def main():
    """Publish UDFs."""
    args = parser.parse_args()

    client = bigquery.Client(args.project_id)

    if args.dependency_dir:
        push_dependencies_to_gcs(args.gcs_bucket, args.gcs_path,
                                 args.dependency_dir, args.project_id)

    raw_udfs = read_udf_dirs(args.udf_dir)

    published_udfs = []

    for raw_udf in raw_udfs:
        # get all dependencies for UDF and publish as persistent UDF
        udfs_to_publish = accumulate_dependencies([], raw_udfs, raw_udf)
        udfs_to_publish.append(raw_udf)

        for dep in udfs_to_publish:
            if dep not in published_udfs:
                publish_udf(
                    raw_udfs[dep],
                    client,
                    args.project_id,
                    args.gcs_bucket,
                    args.gcs_path,
                    raw_udfs.keys(),
                )
                published_udfs.append(dep)
Beispiel #2
0
def main():
    """Validate SQL examples."""
    args = parser.parse_args()

    # parse UDFs
    parsed_udfs = read_udf_dirs(*args.project_dirs)

    for project_dir in args.project_dirs:
        if os.path.isdir(project_dir):
            for root, dirs, files in os.walk(project_dir):
                if os.path.basename(root) == EXAMPLE_DIR:
                    for file in files:
                        dry_run_sql = sql_for_dry_run(os.path.join(root, file),
                                                      parsed_udfs, project_dir)

                        # store sql in temporary file for dry_run
                        tmp_dir = Path(tempfile.mkdtemp()) / Path(root)
                        tmp_dir.mkdir(parents=True, exist_ok=True)
                        tmp_example_file = tmp_dir / file
                        tmp_example_file.write_text(dry_run_sql)

                        dry_run_sql_file(str(tmp_example_file))
Beispiel #3
0
QueryParameter = Union[bigquery.ArrayQueryParameter,
                       bigquery.ScalarQueryParameter,
                       bigquery.StructQueryParameter, ]

table_extensions = {
    "ndjson": bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
    "csv": bigquery.SourceFormat.CSV,
    "backup_info": bigquery.SourceFormat.DATASTORE_BACKUP,
    "export_metadata": bigquery.SourceFormat.DATASTORE_BACKUP,
    "avro": bigquery.SourceFormat.AVRO,
    "parquet": bigquery.SourceFormat.PARQUET,
    "orc": bigquery.SourceFormat.ORC,
}

raw_udfs = parse_udf.read_udf_dirs()


@dataclass
class Table:
    """Define info needed to create a table for a generated test."""

    name: str
    source_format: str
    # a tuple means read via `load(*source_path)` and format as source_format
    # a string means source_path is already in source_format
    source_path: Union[str, Tuple[str, str]]
    # post_init fields
    schema: Optional[List[bigquery.SchemaField]] = None

    def __post_init__(self):
Beispiel #4
0
def udfs():
    """Get all udfs and assertions."""
    return read_udf_dirs("tests/assert", "udf", "udf_js")