Example #1
0
def codegen_from_yaml(
    spark: SparkSession,
    uri: str,
    name: Optional[str] = None,
    options: Optional[Dict[str, str]] = None,
) -> str:
    """Generate code from a YAML file.

    Parameters
    ----------
    spark : SparkSession
        A live spark session
    uri : str
        the model spec URI
    name : model name
        The name of the model.
    options : dict
        Optional parameters passed to the model.

    Returns
    -------
    str
        Spark UDF function name for the generated data.
    """
    with open_uri(uri) as fobj:
        spec = FileModelSpec(fobj, options=options)
    udf = udf_from_spec(spec)
    return register_udf(spark, udf, name)
Example #2
0
File: fs.py Project: eto-ai/rikai
 def __init__(
     self,
     spec_uri: Union[str, Path],
     options: Optional[Dict[str, Any]] = None,
     validate: bool = True,
 ):
     with open_uri(spec_uri) as fobj:
         spec = yaml.load(fobj, Loader=yaml.FullLoader)
     self.base_dir = os.path.dirname(spec_uri)
     spec.setdefault("options", {})
     if options:
         spec["options"].update(options)
     super().__init__(spec, validate=validate)
Example #3
0
def codegen_from_yaml(
    spark: SparkSession,
    uri: str,
    name: Optional[str] = None,
    options: Optional[Dict[str, str]] = None,
) -> str:
    """Generate code from a YAML file.

    Parameters
    ----------
    spark : SparkSession
        A live spark session
    uri : str
        the model spec URI
    name : model name
        The name of the model.
    options : dict
        Optional parameters passed to the model.

    Returns
    -------
    str
        Spark UDF function name for the generated data.
    """
    with open_uri(uri) as fobj:
        spec = ModelSpec(fobj, options=options)

    if spec.version != 1.0:
        raise SpecError(
            f"Only spec version 1.0 is supported, got {spec.version}"
        )

    if spec.flavor == "pytorch":
        from rikai.spark.sql.codegen.pytorch import generate_udf

        udf = generate_udf(
            spec.uri,
            spec.schema,
            spec.options,
            pre_processing=spec.pre_processing,
            post_processing=spec.post_processing,
        )
    else:
        raise SpecError(f"Unsupported model flavor: {spec.flavor}")

    func_name = f"{name}_{secrets.token_hex(4)}"
    spark.udf.register(func_name, udf)
    logger.info(f"Created model inference pandas_udf with name {func_name}")
    return func_name
Example #4
0
    def torch_inference_udf(
        iter: Iterator[pd.DataFrame], ) -> Iterator[pd.DataFrame]:

        with open_uri(model_uri) as fobj:
            model = torch.load(fobj)
        device = torch.device("cuda" if use_gpu else "cpu")

        model.to(device)
        model.eval()

        with torch.no_grad():
            for series in iter:
                dataset = PandasDataset(series, transform=pre_processing)
                results = []
                for batch in DataLoader(
                        dataset,
                        batch_size=batch_size,
                        num_workers=num_workers,
                ):
                    predictions = model(batch)
                    if post_processing:
                        predictions = post_processing(predictions)
                    results.extend(predictions)
                yield pd.DataFrame(results)
Example #5
0
 def open(self, mode="rb") -> BinaryIO:
     """Open the asset and returned as random-accessible file object."""
     return open_uri(self.uri, mode=mode)
Example #6
0
 def open(self, mode="rb") -> BinaryIO:
     """Open the asset and returned as random-accessible file object."""
     if self.is_embedded:
         return BytesIO(self.data)
     return open_uri(self.uri, mode=mode)
Example #7
0
def test_open_https_uri():
    """Test support of https URI"""

    with open_uri(WIKIPEDIA) as fobj:
        assert len(fobj.read()) > 0
Example #8
0
def load_model_from_uri(uri: str):
    with open_uri(uri) as fobj:
        return torch.load(fobj)