def test_build_query_bad_end_date() -> None: # Arrange project = "pycodestyle" all_fields = [PythonVersion] # End date is before start date start_date = "-1" end_date = "-100" # Act / Assert with pytest.raises(ValueError): core.build_query(project, all_fields, start_date, end_date)
def test_build_query_no_project() -> None: # pypinfo -sd -2 -ed -1 -l 20 --all --test '' project = "" all_fields: List[Field] = [] start_date = "-2" end_date = "-1" days = None limit = 20 where = None order = None pip = False expected = r""" SELECT COUNT(*) as download_count, FROM `bigquery-public-data.pypi.file_downloads` WHERE timestamp BETWEEN TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -2 DAY) AND TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 DAY) ORDER BY download_count DESC LIMIT 20 """.strip() # noqa: E501 # Act output = core.build_query(project, all_fields, start_date, end_date, days, limit, where, order, pip) # Assert assert output == expected
def test_build_query_only_aggregate() -> None: # pypinfo -sd -2 -ed -1 -l 20 numpy percent3 project = "numpy" all_fields = [Percent3] start_date = "-2" end_date = "-1" days = None limit = 20 where = None order = None pip = True expected = r""" SELECT ROUND(100 * SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+)") = "3" THEN 1 ELSE 0 END) / COUNT(*), 1) as percent_3, COUNT(*) as download_count, FROM `bigquery-public-data.pypi.file_downloads` WHERE timestamp BETWEEN TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -2 DAY) AND TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 DAY) AND file.project = "numpy" AND details.installer.name = "pip" ORDER BY download_count DESC LIMIT 20 """.strip() # noqa: E501 # Act output = core.build_query(project, all_fields, start_date, end_date, days, limit, where, order, pip) # Assert assert output == expected
def test_build_query_where() -> None: # Arrange # pypinfo -sd -2 -ed -1 --test --where 'file.filename LIKE "%manylinux%"' numpy file project = "numpy" all_fields = [File] start_date = "-2" end_date = "-1" days = None limit = 10 where = 'file.filename LIKE "%manylinux%"' order = None pip = True expected = r""" SELECT file.filename as file, COUNT(*) as download_count, FROM `bigquery-public-data.pypi.file_downloads` WHERE timestamp BETWEEN TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -2 DAY) AND TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 DAY) AND file.project = "numpy" AND details.installer.name = "pip" AND file.filename LIKE "%manylinux%" GROUP BY file ORDER BY download_count DESC LIMIT 10 """.strip() # noqa: E501 # Act output = core.build_query(project, all_fields, start_date, end_date, days, limit, where, order, pip) # Assert assert output == expected
def test_build_query_days() -> None: # Arrange # Data from pycodestyle in 2017-10 # pypinfo -sd 2017-10-01 -ed 2017-10-31 -pc -l 100 --json pycodestyle pyversion project = "pycodestyle" all_fields = [PythonVersion] start_date = None end_date = None days = 10 limit = 100 where = None order = None pip = True expected = r""" SELECT REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") as python_version, COUNT(*) as download_count, FROM `bigquery-public-data.pypi.file_downloads` WHERE timestamp BETWEEN TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -11 DAY) AND TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 DAY) AND file.project = "pycodestyle" AND details.installer.name = "pip" GROUP BY python_version ORDER BY download_count DESC LIMIT 100 """.strip() # noqa: E501 # Act output = core.build_query(project, all_fields, start_date, end_date, days, limit, where, order, pip) # Assert assert output == expected
def test_build_query_specifier() -> None: # pypinfo -sd -2 -ed -1 -l 20 --test 'foo==1' project = "foo==1" all_fields: List[Field] = [] start_date = "-2" end_date = "-1" days = None limit = 20 where = None order = None pip = True expected = r""" SELECT COUNT(*) as download_count, FROM `bigquery-public-data.pypi.file_downloads` WHERE timestamp BETWEEN TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -2 DAY) AND TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 DAY) AND file.project = "foo" AND REGEXP_CONTAINS(file.version, r"(?i)^(0+!)?0*1(\.0+)*$") AND details.installer.name = "pip" ORDER BY download_count DESC LIMIT 20 """.strip() # noqa: E501 # Act output = core.build_query(project, all_fields, start_date, end_date, days, limit, where, order, pip) # Assert assert output == expected
def _build_query(self): return build_query( self.package, self.get_parsed_fields(self.header_fields), limit=self.limit, days=str(self.days), pip=not self.all_installers, )
def pypinfo(ctx, project, fields, auth, run, json, timeout, limit, days, start_date, end_date, where, order): """Valid fields are:\n project | version | pyversion | percent3 | percent2 | impl | impl-version |\n openssl | date | month | year | country | installer | installer-version |\n setuptools-version | system | system-release | distro | distro-version | cpu """ if auth: set_credentials(auth) click.echo('Credentials location set to "{}".'.format( get_credentials())) return if project is None and not fields: click.echo(ctx.get_help()) return parsed_fields = [] for field in fields: parsed = FIELD_MAP.get(field) if parsed is None: raise ValueError('"{}" is an unsupported field.'.format(field)) parsed_fields.append(parsed) built_query = build_query(project, parsed_fields, limit=limit, days=days, start_date=start_date, end_date=end_date, where=where, order=order) if run: client = create_client(get_credentials()) query = client.run_sync_query(built_query) query.timeout_ms = timeout query.run() rows = parse_query_result(query) if not json: click.echo(tabulate(rows)) else: click.echo(format_json(rows)) else: click.echo(built_query)
def test_build_query(): # Arrange # Data from pycodestyle in 2017-10 # pypinfo -sd 2017-10-01 -ed 2017-10-31 -pc -l 100 --json pycodestyle pyversion project = "pycodestyle" all_fields = [PythonVersion] start_date = "2017-10-01" end_date = "2017-10-31" days = None limit = 100 where = None order = None pip = True expected = r""" SELECT REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") as python_version, COUNT(*) as download_count, FROM TABLE_DATE_RANGE( [the-psf:pypi.downloads], TIMESTAMP("2017-10-01 00:00:00"), TIMESTAMP("2017-10-31 23:59:59") ) WHERE file.project = "pycodestyle" AND details.installer.name = "pip" GROUP BY python_version, ORDER BY download_count DESC LIMIT 100 """.strip() # Act output = core.build_query(project, all_fields, start_date, end_date, days, limit, where, order, pip) # Assert assert output == expected
def pypinfo( ctx, project, fields, auth, run, json, indent, timeout, limit, days, start_date, end_date, month, where, order, all_installers, percent, markdown, verbose, ): """Valid fields are:\n project | version | file | pyversion | percent3 | percent2 | impl | impl-version |\n openssl | date | month | year | country | installer | installer-version |\n setuptools-version | system | system-release | distro | distro-version | cpu """ if auth: set_credentials(auth) click.echo('Credentials location set to "{}".'.format(get_credentials())) return if verbose: click.echo('Credentials location set to "{}".'.format(get_credentials()), err=True) if project is None and not fields: click.echo(ctx.get_help()) return parsed_fields = [] for field in fields: parsed = FIELD_MAP.get(field) if parsed is None: raise ValueError('"{}" is an unsupported field.'.format(field)) parsed_fields.append(parsed) order_name = order order = FIELD_MAP.get(order) if order: order_name = order.name parsed_fields.insert(0, order) if month: start_date, end_date = month_ends(month) built_query = build_query( project, parsed_fields, limit=limit, days=days, start_date=start_date, end_date=end_date, where=where, order=order_name, pip=not all_installers, ) if run: client = create_client(get_credentials()) query_job = client.query(built_query, job_config=create_config()) query_rows = query_job.result(timeout=timeout // 1000) # Cached from_cache = not not query_job.cache_hit # Processed bytes_processed = query_job.total_bytes_processed or 0 processed_amount, processed_unit = convert_units(bytes_processed) # Billed bytes_billed = query_job.total_bytes_billed or 0 billed_amount, billed_unit = convert_units(bytes_billed) # Cost billing_tier = query_job.billing_tier or 1 estimated_cost = Decimal(TIER_COST * billing_tier) / TB * Decimal(bytes_billed) estimated_cost = str(estimated_cost.quantize(TO_CENTS, rounding=ROUND_UP)) rows = parse_query_result(query_job, query_rows) if len(rows) == 1 and not json: # Only headers returned click.echo("No data returned, check project name") return if percent: rows = add_percentages(rows, include_sign=not json) # Only for tables, and if more than the header row + a single data row if len(rows) > 2 and not json: rows = add_download_total(rows) if not json: click.echo('Served from cache: {}'.format(from_cache)) click.echo('Data processed: {:.2f} {}'.format(processed_amount, processed_unit)) click.echo('Data billed: {:.2f} {}'.format(billed_amount, billed_unit)) click.echo('Estimated cost: ${}'.format(estimated_cost)) click.echo() click.echo(tabulate(rows, markdown)) else: query_info = { 'cached': from_cache, 'bytes_processed': bytes_processed, 'bytes_billed': bytes_billed, 'estimated_cost': estimated_cost, } click.echo(format_json(rows, query_info, indent)) else: click.echo(built_query)
def test_build_query_bad_project_marker() -> None: with pytest.raises(ValueError, match=".*marker.*"): core.build_query('foo ; sys_platform == "win32"', [])
def test_build_query_bad_project_url() -> None: with pytest.raises(ValueError, match=".*url.*"): core.build_query('foo@https://foo.bar/', [])
def test_build_query_bad_project_extras() -> None: with pytest.raises(ValueError, match=".*extras.*"): core.build_query('foo[bar]', [])