class IEXEarnings(DataSet): ''' "actualEPS": 2.46, "consensusEPS": 2.36, "announceTime": "AMC", "numberOfEstimates": 34, "EPSSurpriseDollar": 0.1, "EPSReportDate": "2019-04-30", "fiscalPeriod": "Q1 2019", "fiscalEndDate": "2019-03-31", "yearAgo": 2.73, "yearAgoChangePercent": -0.0989 ''' announceTime = Column(object_dtype, missing_value='') fiscalPeriod = Column(object_dtype, missing_value='') EPSReportDate = Column(datetime64ns_dtype, missing_value=np.datetime64('1970-01-01')) fiscalEndDate = Column(datetime64ns_dtype, missing_value=np.datetime64('1970-01-01')) actualEPS = Column(float64_dtype, missing_value=np.nan) consensusEPS = Column(float64_dtype, missing_value=np.nan) numberOfEstimates = Column(float64_dtype, missing_value=np.nan) EPSSurpriseDollar = Column(float64_dtype, missing_value=np.nan) yearAgo = Column(float64_dtype, missing_value=np.nan) yearAgoChangePercent = Column(float64_dtype, missing_value=np.nan) _loader = IEXEarningsLoader() @classmethod def get_loader(cls): return cls._loader
def infer_dataset(data_path, fields=None): columns = {} with pd.HDFStore(data_path) as store: dtypes = store['/dtypes'] for k, v in dtypes.iteritems(): if fields is None or k in fields: if v == 'O': columns[k] = Column(v, missing_value='NA') else: columns[k] = Column(v) return type("Dataset", (DataSet, ), columns)
class USEquityPricing(DataSet): """ Dataset representing daily trading prices and volumes. """ open = Column(float64_dtype) high = Column(float64_dtype) low = Column(float64_dtype) close = Column(float64_dtype) volume = Column(float64_dtype) @staticmethod def get_loader(): return _loader
def new_dataset(expr, missing_values, domain): """ Creates or returns a dataset from a blaze expression. Parameters ---------- expr : Expr The blaze expression representing the values. missing_values : frozenset((name, value) pairs Association pairs column name and missing_value for that column. This needs to be a frozenset rather than a dict or tuple of tuples because we want a collection that's unordered but still hashable. domain : zipline.pipeline.domain.Domain Domain of the dataset to be created. Returns ------- ds : type A new dataset type. Notes ----- This function is memoized. repeated calls with the same inputs will return the same type. """ missing_values = dict(missing_values) class_dict = {'ndim': 2 if SID_FIELD_NAME in expr.fields else 1} for name, type_ in expr.dshape.measure.fields: # Don't generate a column for sid or timestamp, since they're # implicitly the labels if the arrays that will be passed to pipeline # Terms. if name in (SID_FIELD_NAME, TS_FIELD_NAME): continue type_ = datashape_type_to_numpy(type_) if can_represent_dtype(type_): col = Column( type_, missing_values.get(name, NotSpecified), ) else: col = NonPipelineField(name, type_) class_dict[name] = col if 'domain' in class_dict: raise ValueError("Got a column named 'domain' in new_dataset(). " "'domain' is reserved.") class_dict['domain'] = domain name = expr._name if name is None: name = next(_new_names) # unicode is a name error in py3 but the branch is only hit # when we are in python 2. if PY2 and isinstance(name, unicode): # pragma: no cover # noqa name = name.encode('utf-8') return type(name, (DataSet, ), class_dict)
def test_construction_error_message(self): with self.assertRaises(ValueError) as exc: Column(dtype=datetime64ns_dtype, currency_aware=True) self.assertEqual( str(exc.exception), 'Columns cannot be constructed with currency_aware=True, ' 'dtype=datetime64[ns]. Currency aware columns must have a float64 ' 'dtype.', )
def new_dataset(expr, deltas, missing_values): """ Creates or returns a dataset from a pair of blaze expressions. Parameters ---------- expr : Expr The blaze expression representing the first known values. deltas : Expr The blaze expression representing the deltas to the data. missing_values : frozenset((name, value) pairs Association pairs column name and missing_value for that column. This needs to be a frozenset rather than a dict or tuple of tuples because we want a collection that's unordered but still hashable. Returns ------- ds : type A new dataset type. Notes ----- This function is memoized. repeated calls with the same inputs will return the same type. """ missing_values = dict(missing_values) columns = {} for name, type_ in expr.dshape.measure.fields: # Don't generate a column for sid or timestamp, since they're # implicitly the labels if the arrays that will be passed to pipeline # Terms. if name in (SID_FIELD_NAME, TS_FIELD_NAME): continue type_ = datashape_type_to_numpy(type_) if can_represent_dtype(type_): col = Column( type_, missing_values.get(name, NotSpecified), ) else: col = NonPipelineField(name, type_) columns[name] = col name = expr._name if name is None: name = next(_new_names) # unicode is a name error in py3 but the branch is only hit # when we are in python 2. if PY2 and isinstance(name, unicode): # noqa name = name.encode('utf-8') return type(name, (DataSet, ), columns)
class IEXCompany(DataSet): symbol = Column(object_dtype, missing_value='') companyName = Column(object_dtype, missing_value='') exchange = Column(object_dtype, missing_value='') industry = Column(object_dtype, missing_value='') website = Column(object_dtype, missing_value='') description = Column(object_dtype, missing_value='') CEO = Column(object_dtype, missing_value='') issueType = Column(object_dtype, missing_value='') sector = Column(object_dtype, missing_value='') # tags = Column(object_dtype, missing_value='') _loader = IEXCompanyLoader() @classmethod def get_loader(cls): return cls._loader
class FundamentalsDataSet(DataSet): """ :class:`~zipline.pipeline.data.DataSet` containing daily trading prices and volumes. """ ROEAVE3 = Column(float64_dtype) total_share_0QE = Column(float64_dtype) ipo_date = Column(float64_dtype) delist_date = Column(float64_dtype) IndustryId = Column(float64_dtype) ipo_date_test = Column(datetime) industry_id = Column(object_dtype)
class EquityPricing(DataSet): """ :class:`~zipline.pipeline.data.DataSet` containing daily trading prices and volumes. """ open = Column(float64_dtype, currency_aware=True) high = Column(float64_dtype, currency_aware=True) low = Column(float64_dtype, currency_aware=True) close = Column(float64_dtype, currency_aware=True) volume = Column(float64_dtype) currency = Column(categorical_dtype)
def new_dataset(expr, deltas): """Creates or returns a dataset from a pair of blaze expressions. Parameters ---------- expr : Expr The blaze expression representing the first known values. deltas : Expr The blaze expression representing the deltas to the data. Returns ------- ds : type A new dataset type. Notes ----- This function is memoized. repeated calls with the same inputs will return the same type. """ columns = {} for name, type_ in expr.dshape.measure.fields: try: if promote(type_, float64, promote_option=False) != float64: raise NotPipelineCompatible() if isinstance(type_, Option): type_ = type_.ty except NotPipelineCompatible: col = NonPipelineField(name, type_) except TypeError: col = NonNumpyField(name, type_) else: col = Column(type_.to_numpy_dtype()) columns[name] = col name = expr._name if name is None: name = next(_new_names) # unicode is a name error in py3 but the branch is only hit # when we are in python 2. if PY2 and isinstance(name, unicode): # noqa name = name.encode('utf-8') return type(name, (DataSet, ), columns)
class IEXKeyStats(DataSet): ''' "companyName": "Apple Inc.", "marketcap": 760334287200, "beta": 1.295227, "week52high": 156.65, "week52low": 93.63, "week52change": 58.801903, "shortInterest": 55544287, "shortDate": "2017-06-15", "dividendRate": 2.52, "dividendYield": 1.7280395, "exDividendDate": "2017-05-11 00:00:00.0", "latestEPS": 8.29, "latestEPSDate": "2016-09-30", "sharesOutstanding": 5213840000, "float": 5203997571, "returnOnEquity": 0.08772939519857577, "consensusEPS": 3.22, "numberOfEstimates": 15, "symbol": "AAPL", "EBITDA": 73828000000, "revenue": 220457000000, "grossProfit": 84686000000, "cash": 256464000000, "debt": 358038000000, "ttmEPS": 8.55, "revenuePerShare": 42.2830389885382, "revenuePerEmployee": 1900491.3793103448, "peRatioHigh": 25.5, "peRatioLow": 8.7, "EPSSurpriseDollar": null, "EPSSurprisePercent": 3.9604, "returnOnAssets": 14.15, "returnOnCapital": null, "profitMargin": 20.73, "priceToSales": 3.6668503, "priceToBook": 6.19, "day200MovingAvg": 140.60541, "day50MovingAvg": 156.49678, "institutionPercent": 32.1, "insiderPercent": null, "shortRatio": 1.6915414, "year5ChangePercent": 0.5902546932200027, "year2ChangePercent": 0.3777449874142869, "year1ChangePercent": 0.39751716851558366, "ytdChangePercent": 0.36659492036160124, "month6ChangePercent": 0.12208398133748043, "month3ChangePercent": 0.08466584665846649, "month1ChangePercent": 0.009668596145283263, "day5ChangePercent": -0.005762605699968781 ''' companyName = Column(object_dtype) marketcap = Column(float64_dtype, missing_value=np.nan) beta = Column(float64_dtype, missing_value=np.nan) week52high = Column(float64_dtype, missing_value=np.nan) week52low = Column(float64_dtype, missing_value=np.nan) week52change = Column(float64_dtype, missing_value=np.nan) shortInterest = Column(float64_dtype, missing_value=np.nan) shortDate = Column(datetime64ns_dtype, missing_value=np.datetime64('1970-01-01')) dividendRate = Column(float64_dtype, missing_value=np.nan) dividendYield = Column(float64_dtype, missing_value=np.nan) exDividendDate = Column(datetime64ns_dtype, missing_value=np.datetime64('1970-01-01')) latestEPS = Column(float64_dtype, missing_value=np.nan) latestEPSDate = Column(datetime64ns_dtype, missing_value=np.datetime64('1970-01-01')) sharesOutstanding = Column(float64_dtype, missing_value=np.nan) float = Column(float64_dtype, missing_value=np.nan) returnOnEquity = Column(float64_dtype, missing_value=np.nan) consensusEPS = Column(float64_dtype, missing_value=np.nan) numberOfEstimates = Column(float64_dtype, missing_value=np.nan) symbol = Column(object_dtype) EBITDA = Column(float64_dtype, missing_value=np.nan) revenue = Column(float64_dtype, missing_value=np.nan) grossProfit = Column(float64_dtype, missing_value=np.nan) cash = Column(float64_dtype, missing_value=np.nan) debt = Column(float64_dtype, missing_value=np.nan) ttmEPS = Column(float64_dtype, missing_value=np.nan) revenuePerShare = Column(float64_dtype, missing_value=np.nan) revenuePerEmployee = Column(float64_dtype, missing_value=np.nan) peRatio = Column(float64_dtype, missing_value=np.nan) EPSSurpriseDollar = Column(float64_dtype, missing_value=np.nan) EPSSurprisePercent = Column(float64_dtype, missing_value=np.nan) returnOnAssets = Column(float64_dtype, missing_value=np.nan) returnOnCapital = Column(float64_dtype, missing_value=np.nan) profitMargin = Column(float64_dtype, missing_value=np.nan) priceToSales = Column(float64_dtype, missing_value=np.nan) priceToBook = Column(float64_dtype, missing_value=np.nan) day200MovingAvg = Column(float64_dtype, missing_value=np.nan) day50MovingAvg = Column(float64_dtype, missing_value=np.nan) institutionPercent = Column(float64_dtype, missing_value=np.nan) insiderPercent = Column(float64_dtype, missing_value=np.nan) shortRatio = Column(float64_dtype, missing_value=np.nan) year5ChangePercent = Column(float64_dtype, missing_value=np.nan) year2ChangePercent = Column(float64_dtype, missing_value=np.nan) year1ChangePercent = Column(float64_dtype, missing_value=np.nan) ytdChangePercent = Column(float64_dtype, missing_value=np.nan) month6ChangePercent = Column(float64_dtype, missing_value=np.nan) month3ChangePercent = Column(float64_dtype, missing_value=np.nan) month1ChangePercent = Column(float64_dtype, missing_value=np.nan) day5ChangePercent = Column(float64_dtype, missing_value=np.nan) _loader = IEXKeyStatsLoader() @classmethod def get_loader(cls): return cls._loader
class PolygonCompany(DataSet): ''' "logo": "https://s3.polygon.io/logos/aapl/logo.png", "exchange": "Nasdaq Global Select", "name": "Apple Inc.", "symbol": "AAPL", "listdate": "2018-08-15", "cik": "0000320193", "bloomberg": "EQ0010169500001000", "figi": "string", "lei": "HWUPKR0MPOU8FGXBT394", "sic": 3571, "country": "us", "industry": "Computer Hardware", "sector": "Technology", "marketcap": 815604985500, "employees": 116000, "phone": "(408) 996-1010", "ceo": "Tim Cook", "url": "http://www.apple.com", "description": "Apple Inc. designs, manufactures, and markets mobile communication and media devices, personal computers, and portable digital music players to consumers...\n", ''' # noqa exchange = Column(object_dtype, missing_value='') name = Column(object_dtype, missing_value='') symbol = Column(object_dtype, missing_value='') listdate = Column(datetime64D_dtype, missing_value=np.datetime64('1970-01-01')) cik = Column(object_dtype, missing_value='') bloomberg = Column(object_dtype, missing_value='') figi = Column(object_dtype, missing_value='') lei = Column(object_dtype, missing_value='') sic = Column(float64_dtype, missing_value=np.nan) country = Column(object_dtype, missing_value='') industry = Column(object_dtype, missing_value='') sector = Column(object_dtype, missing_value='') marketcap = Column(float64_dtype, missing_value=np.nan) employees = Column(float64_dtype, missing_value=np.nan) phone = Column(object_dtype, missing_value='') ceo = Column(object_dtype, missing_value='') tags = Column(object_dtype, missing_value='') _loader = PolygonCompanyLoader() @classmethod def get_loader(cls): return cls._loader
class LargeDataSet(DataSet): locals().update( {name: Column(dtype=float) for name in string.ascii_lowercase})
class SomeDataSet(DataSet): a = Column(dtype=float) b = Column(dtype=object) c = Column(dtype=int, missing_value=-1) exists_but_not_a_column = "foo"
class IEXFinancials(DataSet): ''' "reportDate": "2019-03-31", "grossProfit": 21648000000, "costOfRevenue": 36270000000, "operatingRevenue": 57918000000, "totalRevenue": 57918000000, "operatingIncome": 13242000000, "netIncome": 11561000000, "researchAndDevelopment": 3948000000, "operatingExpense": 44676000000, "currentAssets": 123346000000, "totalAssets": 341998000000, "totalLiabilities": 236138000000, "currentCash": 38329000000, "currentDebt": 22429000000, "shortTermDebt": 22429000000, "longTermDebt": 90201000000, "totalCash": 80433000000, "totalDebt": 112630000000, "shareholderEquity": 105860000000, "cashChange": -4954000000, "cashFlow": 11155000000 ''' reportDate = Column(float64_dtype, missing_value=np.nan) grossProfit = Column(float64_dtype, missing_value=np.nan) costOfRevenue = Column(float64_dtype, missing_value=np.nan) operatingRevenue = Column(float64_dtype, missing_value=np.nan) totalRevenue = Column(float64_dtype, missing_value=np.nan) operatingIncome = Column(float64_dtype, missing_value=np.nan) netIncome = Column(float64_dtype, missing_value=np.nan) researchAndDevelopment = Column(float64_dtype, missing_value=np.nan) operatingExpense = Column(float64_dtype, missing_value=np.nan) currentAssets = Column(float64_dtype, missing_value=np.nan) totalAssets = Column(float64_dtype, missing_value=np.nan) totalLiabilities = Column(float64_dtype, missing_value=np.nan) currentCash = Column(float64_dtype, missing_value=np.nan) currentDebt = Column(float64_dtype, missing_value=np.nan) shortTermDebt = Column(float64_dtype, missing_value=np.nan) longTermDebt = Column(float64_dtype, missing_value=np.nan) totalCash = Column(float64_dtype, missing_value=np.nan) totalDebt = Column(float64_dtype, missing_value=np.nan) shareholderEquity = Column(float64_dtype, missing_value=np.nan) cashChange = Column(float64_dtype, missing_value=np.nan) cashFlow = Column(float64_dtype, missing_value=np.nan) _loader = IEXFinancialsLoader() @classmethod def get_loader(cls): return cls._loader
class LargeDataSet(DataSet): locals().update({ name: Column(dtype=float) for name in chrange('a', 'z') })
class SomeDataSet(DataSet): a = Column(dtype=float) b = Column(dtype=object) c = Column(dtype=int, missing_value=-1)
from zipline.utils.numpy_utils import float64_dtype, categorical_dtype from zipline.pipeline.domain import US_EQUITIES from zipline.pipeline.data.dataset import Column, DataSet MyDataSet = type( "MyDataset", (DataSet, ), { x: Column(float64_dtype) for x in [ 'open', 'high', 'low', 'close', 'volume', # 'ex_dividend', # 'split_ratio', # 'adj_open', # 'adj_high', # 'adj_low', # 'adj_close', # 'adj_volume' ] }) if __name__ == '__main__': dat = MyDataSet