Example #1
0
class IEXEarnings(DataSet):
    '''
        "actualEPS": 2.46,
        "consensusEPS": 2.36,
        "announceTime": "AMC",
        "numberOfEstimates": 34,
        "EPSSurpriseDollar": 0.1,
        "EPSReportDate": "2019-04-30",
        "fiscalPeriod": "Q1 2019",
        "fiscalEndDate": "2019-03-31",
        "yearAgo": 2.73,
        "yearAgoChangePercent": -0.0989
    '''

    announceTime = Column(object_dtype, missing_value='')
    fiscalPeriod = Column(object_dtype, missing_value='')
    EPSReportDate = Column(datetime64ns_dtype,
                           missing_value=np.datetime64('1970-01-01'))
    fiscalEndDate = Column(datetime64ns_dtype,
                           missing_value=np.datetime64('1970-01-01'))
    actualEPS = Column(float64_dtype, missing_value=np.nan)
    consensusEPS = Column(float64_dtype, missing_value=np.nan)
    numberOfEstimates = Column(float64_dtype, missing_value=np.nan)
    EPSSurpriseDollar = Column(float64_dtype, missing_value=np.nan)
    yearAgo = Column(float64_dtype, missing_value=np.nan)
    yearAgoChangePercent = Column(float64_dtype, missing_value=np.nan)

    _loader = IEXEarningsLoader()

    @classmethod
    def get_loader(cls):
        return cls._loader
Example #2
0
    def infer_dataset(data_path, fields=None):

        columns = {}
        with pd.HDFStore(data_path) as store:
            dtypes = store['/dtypes']
            for k, v in dtypes.iteritems():
                if fields is None or k in fields:
                    if v == 'O':
                        columns[k] = Column(v, missing_value='NA')
                    else:
                        columns[k] = Column(v)
        return type("Dataset", (DataSet, ), columns)
Example #3
0
class USEquityPricing(DataSet):
    """
    Dataset representing daily trading prices and volumes.
    """
    open = Column(float64_dtype)
    high = Column(float64_dtype)
    low = Column(float64_dtype)
    close = Column(float64_dtype)
    volume = Column(float64_dtype)

    @staticmethod
    def get_loader():
        return _loader
Example #4
0
def new_dataset(expr, missing_values, domain):
    """
    Creates or returns a dataset from a blaze expression.

    Parameters
    ----------
    expr : Expr
        The blaze expression representing the values.
    missing_values : frozenset((name, value) pairs
        Association pairs column name and missing_value for that column.

        This needs to be a frozenset rather than a dict or tuple of tuples
        because we want a collection that's unordered but still hashable.
    domain : zipline.pipeline.domain.Domain
        Domain of the dataset to be created.

    Returns
    -------
    ds : type
        A new dataset type.

    Notes
    -----
    This function is memoized. repeated calls with the same inputs will return
    the same type.
    """
    missing_values = dict(missing_values)
    class_dict = {'ndim': 2 if SID_FIELD_NAME in expr.fields else 1}
    for name, type_ in expr.dshape.measure.fields:
        # Don't generate a column for sid or timestamp, since they're
        # implicitly the labels if the arrays that will be passed to pipeline
        # Terms.
        if name in (SID_FIELD_NAME, TS_FIELD_NAME):
            continue
        type_ = datashape_type_to_numpy(type_)
        if can_represent_dtype(type_):
            col = Column(
                type_,
                missing_values.get(name, NotSpecified),
            )
        else:
            col = NonPipelineField(name, type_)
        class_dict[name] = col

    if 'domain' in class_dict:
        raise ValueError("Got a column named 'domain' in new_dataset(). "
                         "'domain' is reserved.")
    class_dict['domain'] = domain

    name = expr._name
    if name is None:
        name = next(_new_names)

    # unicode is a name error in py3 but the branch is only hit
    # when we are in python 2.
    if PY2 and isinstance(name, unicode):  # pragma: no cover # noqa
        name = name.encode('utf-8')

    return type(name, (DataSet, ), class_dict)
Example #5
0
    def test_construction_error_message(self):
        with self.assertRaises(ValueError) as exc:
            Column(dtype=datetime64ns_dtype, currency_aware=True)

        self.assertEqual(
            str(exc.exception),
            'Columns cannot be constructed with currency_aware=True, '
            'dtype=datetime64[ns]. Currency aware columns must have a float64 '
            'dtype.',
        )
Example #6
0
def new_dataset(expr, deltas, missing_values):
    """
    Creates or returns a dataset from a pair of blaze expressions.

    Parameters
    ----------
    expr : Expr
        The blaze expression representing the first known values.
    deltas : Expr
        The blaze expression representing the deltas to the data.
    missing_values : frozenset((name, value) pairs
        Association pairs column name and missing_value for that column.

        This needs to be a frozenset rather than a dict or tuple of tuples
        because we want a collection that's unordered but still hashable.

    Returns
    -------
    ds : type
        A new dataset type.

    Notes
    -----
    This function is memoized. repeated calls with the same inputs will return
    the same type.
    """
    missing_values = dict(missing_values)
    columns = {}
    for name, type_ in expr.dshape.measure.fields:
        # Don't generate a column for sid or timestamp, since they're
        # implicitly the labels if the arrays that will be passed to pipeline
        # Terms.
        if name in (SID_FIELD_NAME, TS_FIELD_NAME):
            continue
        type_ = datashape_type_to_numpy(type_)
        if can_represent_dtype(type_):
            col = Column(
                type_,
                missing_values.get(name, NotSpecified),
            )
        else:
            col = NonPipelineField(name, type_)
        columns[name] = col

    name = expr._name
    if name is None:
        name = next(_new_names)

    # unicode is a name error in py3 but the branch is only hit
    # when we are in python 2.
    if PY2 and isinstance(name, unicode):  # noqa
        name = name.encode('utf-8')

    return type(name, (DataSet, ), columns)
Example #7
0
class IEXCompany(DataSet):

    symbol = Column(object_dtype, missing_value='')
    companyName = Column(object_dtype, missing_value='')
    exchange = Column(object_dtype, missing_value='')
    industry = Column(object_dtype, missing_value='')
    website = Column(object_dtype, missing_value='')
    description = Column(object_dtype, missing_value='')
    CEO = Column(object_dtype, missing_value='')
    issueType = Column(object_dtype, missing_value='')
    sector = Column(object_dtype, missing_value='')
    # tags = Column(object_dtype, missing_value='')

    _loader = IEXCompanyLoader()

    @classmethod
    def get_loader(cls):
        return cls._loader
Example #8
0
class FundamentalsDataSet(DataSet):
    """
    :class:`~zipline.pipeline.data.DataSet` containing daily trading prices and
    volumes.
    """
    ROEAVE3 = Column(float64_dtype)
    total_share_0QE = Column(float64_dtype)
    ipo_date = Column(float64_dtype)
    delist_date = Column(float64_dtype)
    IndustryId = Column(float64_dtype)
    ipo_date_test = Column(datetime)
    industry_id = Column(object_dtype)
Example #9
0
class EquityPricing(DataSet):
    """
    :class:`~zipline.pipeline.data.DataSet` containing daily trading prices and
    volumes.
    """
    open = Column(float64_dtype, currency_aware=True)
    high = Column(float64_dtype, currency_aware=True)
    low = Column(float64_dtype, currency_aware=True)
    close = Column(float64_dtype, currency_aware=True)
    volume = Column(float64_dtype)
    currency = Column(categorical_dtype)
Example #10
0
def new_dataset(expr, deltas):
    """Creates or returns a dataset from a pair of blaze expressions.

    Parameters
    ----------
    expr : Expr
       The blaze expression representing the first known values.
    deltas : Expr
       The blaze expression representing the deltas to the data.

    Returns
    -------
    ds : type
        A new dataset type.

    Notes
    -----
    This function is memoized. repeated calls with the same inputs will return
    the same type.
    """
    columns = {}
    for name, type_ in expr.dshape.measure.fields:
        try:
            if promote(type_, float64, promote_option=False) != float64:
                raise NotPipelineCompatible()
            if isinstance(type_, Option):
                type_ = type_.ty
        except NotPipelineCompatible:
            col = NonPipelineField(name, type_)
        except TypeError:
            col = NonNumpyField(name, type_)
        else:
            col = Column(type_.to_numpy_dtype())

        columns[name] = col

    name = expr._name
    if name is None:
        name = next(_new_names)

    # unicode is a name error in py3 but the branch is only hit
    # when we are in python 2.
    if PY2 and isinstance(name, unicode):  # noqa
        name = name.encode('utf-8')

    return type(name, (DataSet, ), columns)
Example #11
0
class IEXKeyStats(DataSet):
    '''
  "companyName": "Apple Inc.",
  "marketcap": 760334287200,
  "beta": 1.295227,
  "week52high": 156.65,
  "week52low": 93.63,
  "week52change": 58.801903,
  "shortInterest": 55544287,
  "shortDate": "2017-06-15",
  "dividendRate": 2.52,
  "dividendYield": 1.7280395,
  "exDividendDate": "2017-05-11 00:00:00.0",
  "latestEPS": 8.29,
  "latestEPSDate": "2016-09-30",
  "sharesOutstanding": 5213840000,
  "float": 5203997571, "returnOnEquity": 0.08772939519857577,
  "consensusEPS": 3.22,
  "numberOfEstimates": 15,
  "symbol": "AAPL",
  "EBITDA": 73828000000,
  "revenue": 220457000000,
  "grossProfit": 84686000000,
  "cash": 256464000000,
  "debt": 358038000000,
  "ttmEPS": 8.55,
  "revenuePerShare": 42.2830389885382,
  "revenuePerEmployee": 1900491.3793103448, "peRatioHigh": 25.5,
  "peRatioLow": 8.7,
  "EPSSurpriseDollar": null,
  "EPSSurprisePercent": 3.9604,
  "returnOnAssets": 14.15,
  "returnOnCapital": null,
  "profitMargin": 20.73,
  "priceToSales": 3.6668503,
  "priceToBook": 6.19,
  "day200MovingAvg": 140.60541,
  "day50MovingAvg": 156.49678,
  "institutionPercent": 32.1,
  "insiderPercent": null,
  "shortRatio": 1.6915414,
  "year5ChangePercent": 0.5902546932200027,
  "year2ChangePercent": 0.3777449874142869,
  "year1ChangePercent": 0.39751716851558366,
  "ytdChangePercent": 0.36659492036160124,
  "month6ChangePercent": 0.12208398133748043,
  "month3ChangePercent": 0.08466584665846649,
  "month1ChangePercent": 0.009668596145283263,
  "day5ChangePercent": -0.005762605699968781
    '''

    companyName = Column(object_dtype)
    marketcap = Column(float64_dtype, missing_value=np.nan)
    beta = Column(float64_dtype, missing_value=np.nan)
    week52high = Column(float64_dtype, missing_value=np.nan)
    week52low = Column(float64_dtype, missing_value=np.nan)
    week52change = Column(float64_dtype, missing_value=np.nan)
    shortInterest = Column(float64_dtype, missing_value=np.nan)
    shortDate = Column(datetime64ns_dtype,
                       missing_value=np.datetime64('1970-01-01'))
    dividendRate = Column(float64_dtype, missing_value=np.nan)
    dividendYield = Column(float64_dtype, missing_value=np.nan)
    exDividendDate = Column(datetime64ns_dtype,
                            missing_value=np.datetime64('1970-01-01'))
    latestEPS = Column(float64_dtype, missing_value=np.nan)
    latestEPSDate = Column(datetime64ns_dtype,
                           missing_value=np.datetime64('1970-01-01'))
    sharesOutstanding = Column(float64_dtype, missing_value=np.nan)
    float = Column(float64_dtype, missing_value=np.nan)
    returnOnEquity = Column(float64_dtype, missing_value=np.nan)
    consensusEPS = Column(float64_dtype, missing_value=np.nan)
    numberOfEstimates = Column(float64_dtype, missing_value=np.nan)
    symbol = Column(object_dtype)
    EBITDA = Column(float64_dtype, missing_value=np.nan)
    revenue = Column(float64_dtype, missing_value=np.nan)
    grossProfit = Column(float64_dtype, missing_value=np.nan)
    cash = Column(float64_dtype, missing_value=np.nan)
    debt = Column(float64_dtype, missing_value=np.nan)
    ttmEPS = Column(float64_dtype, missing_value=np.nan)
    revenuePerShare = Column(float64_dtype, missing_value=np.nan)
    revenuePerEmployee = Column(float64_dtype, missing_value=np.nan)
    peRatio = Column(float64_dtype, missing_value=np.nan)
    EPSSurpriseDollar = Column(float64_dtype, missing_value=np.nan)
    EPSSurprisePercent = Column(float64_dtype, missing_value=np.nan)
    returnOnAssets = Column(float64_dtype, missing_value=np.nan)
    returnOnCapital = Column(float64_dtype, missing_value=np.nan)
    profitMargin = Column(float64_dtype, missing_value=np.nan)
    priceToSales = Column(float64_dtype, missing_value=np.nan)
    priceToBook = Column(float64_dtype, missing_value=np.nan)
    day200MovingAvg = Column(float64_dtype, missing_value=np.nan)
    day50MovingAvg = Column(float64_dtype, missing_value=np.nan)
    institutionPercent = Column(float64_dtype, missing_value=np.nan)
    insiderPercent = Column(float64_dtype, missing_value=np.nan)
    shortRatio = Column(float64_dtype, missing_value=np.nan)
    year5ChangePercent = Column(float64_dtype, missing_value=np.nan)
    year2ChangePercent = Column(float64_dtype, missing_value=np.nan)
    year1ChangePercent = Column(float64_dtype, missing_value=np.nan)
    ytdChangePercent = Column(float64_dtype, missing_value=np.nan)
    month6ChangePercent = Column(float64_dtype, missing_value=np.nan)
    month3ChangePercent = Column(float64_dtype, missing_value=np.nan)
    month1ChangePercent = Column(float64_dtype, missing_value=np.nan)
    day5ChangePercent = Column(float64_dtype, missing_value=np.nan)

    _loader = IEXKeyStatsLoader()

    @classmethod
    def get_loader(cls):
        return cls._loader
Example #12
0
class PolygonCompany(DataSet):

    '''
    "logo": "https://s3.polygon.io/logos/aapl/logo.png",
    "exchange": "Nasdaq Global Select",
    "name": "Apple Inc.",
    "symbol": "AAPL",
    "listdate": "2018-08-15",
    "cik": "0000320193",
    "bloomberg": "EQ0010169500001000",
    "figi": "string",
    "lei": "HWUPKR0MPOU8FGXBT394",
    "sic": 3571,
    "country": "us",
    "industry": "Computer Hardware",
    "sector": "Technology",
    "marketcap": 815604985500,
    "employees": 116000,
    "phone": "(408) 996-1010",
    "ceo": "Tim Cook",
    "url": "http://www.apple.com",
    "description": "Apple Inc. designs, manufactures, and markets mobile communication and media devices, personal computers, and portable digital music players to consumers...\n",
    '''  # noqa

    exchange = Column(object_dtype, missing_value='')
    name = Column(object_dtype, missing_value='')
    symbol = Column(object_dtype, missing_value='')
    listdate = Column(datetime64D_dtype,
                      missing_value=np.datetime64('1970-01-01'))
    cik = Column(object_dtype, missing_value='')
    bloomberg = Column(object_dtype, missing_value='')
    figi = Column(object_dtype, missing_value='')
    lei = Column(object_dtype, missing_value='')
    sic = Column(float64_dtype, missing_value=np.nan)
    country = Column(object_dtype, missing_value='')
    industry = Column(object_dtype, missing_value='')
    sector = Column(object_dtype, missing_value='')
    marketcap = Column(float64_dtype, missing_value=np.nan)
    employees = Column(float64_dtype, missing_value=np.nan)
    phone = Column(object_dtype, missing_value='')
    ceo = Column(object_dtype, missing_value='')
    tags = Column(object_dtype, missing_value='')

    _loader = PolygonCompanyLoader()

    @classmethod
    def get_loader(cls):
        return cls._loader
Example #13
0
class LargeDataSet(DataSet):
    locals().update(
        {name: Column(dtype=float)
         for name in string.ascii_lowercase})
Example #14
0
class SomeDataSet(DataSet):
    a = Column(dtype=float)
    b = Column(dtype=object)
    c = Column(dtype=int, missing_value=-1)

    exists_but_not_a_column = "foo"
Example #15
0
class IEXFinancials(DataSet):
    '''
        "reportDate": "2019-03-31",
        "grossProfit": 21648000000,
        "costOfRevenue": 36270000000,
        "operatingRevenue": 57918000000,
        "totalRevenue": 57918000000,
        "operatingIncome": 13242000000,
        "netIncome": 11561000000,
        "researchAndDevelopment": 3948000000,
        "operatingExpense": 44676000000,
        "currentAssets": 123346000000,
        "totalAssets": 341998000000,
        "totalLiabilities": 236138000000,
        "currentCash": 38329000000,
        "currentDebt": 22429000000,
        "shortTermDebt": 22429000000,
        "longTermDebt": 90201000000,
        "totalCash": 80433000000,
        "totalDebt": 112630000000,
        "shareholderEquity": 105860000000,
        "cashChange": -4954000000,
        "cashFlow": 11155000000
    '''

    reportDate = Column(float64_dtype, missing_value=np.nan)
    grossProfit = Column(float64_dtype, missing_value=np.nan)
    costOfRevenue = Column(float64_dtype, missing_value=np.nan)
    operatingRevenue = Column(float64_dtype, missing_value=np.nan)
    totalRevenue = Column(float64_dtype, missing_value=np.nan)
    operatingIncome = Column(float64_dtype, missing_value=np.nan)
    netIncome = Column(float64_dtype, missing_value=np.nan)
    researchAndDevelopment = Column(float64_dtype, missing_value=np.nan)
    operatingExpense = Column(float64_dtype, missing_value=np.nan)
    currentAssets = Column(float64_dtype, missing_value=np.nan)
    totalAssets = Column(float64_dtype, missing_value=np.nan)
    totalLiabilities = Column(float64_dtype, missing_value=np.nan)
    currentCash = Column(float64_dtype, missing_value=np.nan)
    currentDebt = Column(float64_dtype, missing_value=np.nan)
    shortTermDebt = Column(float64_dtype, missing_value=np.nan)
    longTermDebt = Column(float64_dtype, missing_value=np.nan)
    totalCash = Column(float64_dtype, missing_value=np.nan)
    totalDebt = Column(float64_dtype, missing_value=np.nan)
    shareholderEquity = Column(float64_dtype, missing_value=np.nan)
    cashChange = Column(float64_dtype, missing_value=np.nan)
    cashFlow = Column(float64_dtype, missing_value=np.nan)

    _loader = IEXFinancialsLoader()

    @classmethod
    def get_loader(cls):
        return cls._loader
Example #16
0
class LargeDataSet(DataSet):
    locals().update({
        name: Column(dtype=float)
        for name in chrange('a', 'z')
    })
Example #17
0
class SomeDataSet(DataSet):
    a = Column(dtype=float)
    b = Column(dtype=object)
    c = Column(dtype=int, missing_value=-1)
Example #18
0
from zipline.utils.numpy_utils import float64_dtype, categorical_dtype

from zipline.pipeline.domain import US_EQUITIES
from zipline.pipeline.data.dataset import Column, DataSet

MyDataSet = type(
    "MyDataset",
    (DataSet, ),
    {
        x: Column(float64_dtype)
        for x in [
            'open',
            'high',
            'low',
            'close',
            'volume',
            # 'ex_dividend',
            # 'split_ratio',
            # 'adj_open',
            # 'adj_high',
            # 'adj_low',
            # 'adj_close',
            # 'adj_volume'
        ]
    })

if __name__ == '__main__':
    dat = MyDataSet