def _rows_from_json(values, schema):
    """Convert JSON row data to rows with appropriate types."""
    from google.cloud.bigquery import Row

    field_to_index = _field_to_index_mapping(schema)
    return [Row(_row_tuple_from_json(r, schema), field_to_index)
            for r in values]
def _item_to_row(iterator, resource):
    """Convert a JSON row to the native object.

    .. note::

        This assumes that the ``schema`` attribute has been
        added to the iterator after being created, which
        should be done by the caller.

    :type iterator: :class:`~google.api_core.page_iterator.Iterator`
    :param iterator: The iterator that is currently in use.

    :type resource: dict
    :param resource: An item to be converted to a row.

    :rtype: :class:`~google.cloud.bigquery.table.Row`
    :returns: The next row in the page.
    """
    from google.cloud.bigquery import Row

    return Row(_row_tuple_from_json(resource, iterator.schema),
               iterator._field_to_index)
Exemple #3
0
def _rows_from_json(values, schema):
    """Convert JSON row data to rows with appropriate types.

    Args:
        values (Sequence[Dict]): The list of responses (JSON rows) to convert.
        schema (Sequence[Union[ \
                :class:`~google.cloud.bigquery.schema.SchemaField`, \
                Mapping[str, Any] \
        ]]):
            The table's schema. If any item is a mapping, its content must be
            compatible with
            :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.

    Returns:
        List[:class:`~google.cloud.bigquery.Row`]
    """
    from google.cloud.bigquery import Row
    from google.cloud.bigquery.schema import _to_schema_fields

    schema = _to_schema_fields(schema)
    field_to_index = _field_to_index_mapping(schema)
    return [Row(_row_tuple_from_json(r, schema), field_to_index) for r in values]
Exemple #4
0
    def test_computes_trending(self, db_request, with_purges):
        projects = [
            ProjectFactory.create(zscore=1 if not i else None)
            for i in range(3)
        ]

        results = iter([
            Row((projects[1].normalized_name, 2), {
                "project": 0,
                "zscore": 1
            }),
            Row((projects[2].normalized_name, -1), {
                "project": 0,
                "zscore": 1
            }),
        ])
        query = pretend.stub(
            result=pretend.call_recorder(lambda *a, **kw: results))
        bigquery = pretend.stub(query=pretend.call_recorder(lambda q: query))

        cacher = pretend.stub(purge=pretend.call_recorder(lambda keys: None))

        def find_service(iface=None, name=None):
            if iface is None and name == "gcloud.bigquery":
                return bigquery

            if with_purges and issubclass(iface, IOriginCache):
                return cacher

            raise LookupError

        db_request.find_service = find_service
        db_request.registry.settings = {
            "warehouse.trending_table": "example.pypi.downloads*"
        }

        compute_trending(db_request)

        assert bigquery.query.calls == [
            pretend.call(""" SELECT project,
                   IF(
                        STDDEV(downloads) > 0,
                        (todays_downloads - AVG(downloads))/STDDEV(downloads),
                        NULL
                    ) as zscore
            FROM (
                SELECT project,
                       date,
                       downloads,
                       FIRST_VALUE(downloads) OVER (
                            PARTITION BY project
                            ORDER BY DATE DESC
                            ROWS BETWEEN UNBOUNDED PRECEDING
                                AND UNBOUNDED FOLLOWING
                        ) as todays_downloads
                FROM (
                    SELECT file.project as project,
                           DATE(timestamp) AS date,
                           COUNT(*) as downloads
                    FROM `example.pypi.downloads*`
                    WHERE _TABLE_SUFFIX BETWEEN
                        FORMAT_DATE(
                            "%Y%m%d",
                            DATE_ADD(CURRENT_DATE(), INTERVAL -31 day))
                        AND
                        FORMAT_DATE(
                            "%Y%m%d",
                            DATE_ADD(CURRENT_DATE(), INTERVAL -1 day))
                    GROUP BY file.project, date
                )
            )
            GROUP BY project, todays_downloads
            HAVING SUM(downloads) >= 5000
            ORDER BY zscore DESC
        """)
        ]
        assert query.result.calls == [pretend.call()]
        assert cacher.purge.calls == ([pretend.call(["trending"])]
                                      if with_purges else [])

        results = dict(db_request.db.query(Project.name, Project.zscore).all())

        assert results == {
            projects[0].name: None,
            projects[1].name: 2,
            projects[2].name: -1,
        }