Exemple #1
0
def knn(df: DataFrame, p: list, k: int, coordSys: str, unique: bool):
    """ Finds the K nearest neighbors of the query object.

    The naive implementation here searches through all the the objects in
    the DataFrame to get the KNN. The nearness of the objects here
    is decided on the basis of the distance between their centers.

    Parameters
    ----------
    df : DataFrame
        Input Dataframe. Must have 3 columns corresponding to the
        coordinate (x, y, z) if cartesian or (r, theta, phi) f spherical.
    p : list of float
        Targeted point for which we want neighbors.
    k : int
        Number of neighbours
    coordSys : str
        Coordinate system: spherical or cartesian
    unique : bool
        Boolean. If true, returns only distinct objects. Default is false.

    Returns
    --------
    out : DataFrame
        DataFrame with the coordinates of the k neighbours found.

    Examples
    --------
    >>> df = spark.read.format("fits")\
        .option("hdu", 1)\
        .load("../src/test/resources/cartesian_points.fits")

    Get the 100 closest neighbours around the point [0.2, 0.2, 0.2]
    >>> K = 100
    >>> target = [0.2, 0.2, 0.2]
    >>> unique = False
    >>> neighbours = knn(df.select("x", "y", "z"), target, K, "spherical", unique)

    >>> print(neighbours.count())
    100

    You can add back the metadata
    >>> neighboursWithMeta = df.join(neighbours, ["x", "y", "z"], "left_semi")
    """
    prefix = "com.astrolabsoftware.spark3d"
    scalapath = "{}.Queries.KNN".format(prefix)
    scalaclass = load_from_jvm(scalapath)

    # # To convert python List to Scala Map
    convpath = "{}.python.PythonClassTag.javaListtoscalaList".format(prefix)
    conv = load_from_jvm(convpath)

    out = _java2py(get_spark_context(),
                   scalaclass(df._jdf, conv(p), k, coordSys, unique))

    return out
Exemple #2
0
def checkLoadBalancing(df: DataFrame,
                       kind: str = "frac",
                       numberOfElements: int = -1):
    """
    DataFrame containing the weight of each partition.
    You can choose between outputing the size (number of rows) of each partition
    or the fractional size (%) to the total number of rows.
    size of the dataset (in percent). This is useful to check whether the
    load is correctly balanced.

    Parameters
    ----------
    df : DataFrame
        Input DataFrame
    kind : str
        print the load balancing in terms of fractional size (kind="frac")
        or number of rows per partition (kind="size"). Default is "frac".
    numberOfElements : int
        (optional). Total number of elements in the DataFrame.
        Only needed if you choose to output fractional sizes (kind="frac").
        If not provided (i.e. default value of -1) and kind="frac",
        it will be computed (count).

    Returns
    ----------
    dfout : DataFrame containing the weight of each partition.

    Examples
    ----------
    Load data
    >>> df = spark.read.format("fits")\
        .option("hdu", 1)\
        .load("../src/test/resources/astro_obs.fits")

    Fake repartitioning in 10 equal sized partitions
    >>> df = df.repartition(10)

    Compute the load balancing %
    >>> df_load = checkLoadBalancing(df, kind="frac")

    Note that this is a DataFrame, so you can use df.show()
    Here we will check that the total is indeed 100%
    >>> val = df_load.select("Load (%)").collect()
    >>> assert(int(sum([i[0] for i in val])) == 100)

    Same using number of rows instead of fractional contribution
    >>> df_load = checkLoadBalancing(df, kind="size")
    >>> val = df_load.select("Load (#Rows)").collect()

    >>> assert(int(sum([i[0] for i in val])) == df.count())
    """
    prefix = "com.astrolabsoftware.spark3d"
    scalapath = "{}.Checkers.checkLoadBalancing".format(prefix)
    scalaclass = load_from_jvm(scalapath)

    dfout = _java2py(get_spark_context(),
                     scalaclass(df._jdf, kind, numberOfElements))

    return dfout
def ShellEnvelope(*args) -> JavaObject:
    """
    Binding around ShellEnvelope.scala. For full description, see
    `$spark3d/src/main/scala/com/spark3d/geometryObjects/ShellEnvelope.scala`

    The Scala version makes use of several constructors (i.e. with different
    kinds of argument). In order to mimick this within a single routine, we
    abstract the arguments of the constructor using the iterable `*args`.
    There are then 5 possibilities to instantiate a `ShellEnvelope`:

    Case 1: Defined with a center coordinates, inner and outer radius.
        args = [x: Double, y: Double, z: Double,
            isSpherical: Boolean, innerRadius: Double, outerRadius: Double]
    Case 2: Defined with a center coordinates, and a radius (= a sphere).
        args = [x: Double, y: Double, z: Double,
            isSpherical: Boolean, radius: Double]
    Case 3: Defined with a Point3D, and a radius (= a sphere).
        args = [p: Point3D(...), radius: Double]
    Case 4: from another ShellEnvelope
        args = [shell: ShellEnvelope(...)]
    Case 5: Null envelope
        args = []

    Returns
    ----------
    shell : ShellEnvelope instance
        An instance of the class ShellEnvelope. Throw an error if
        the iterable in the constructor is not understood.

    Example
    ----------
    >>> from pyspark3d.geometryObjects import Point3D

    Case 1: Defined with a center coordinates (cart), inner and outer radius.
    >>> shell_case1 = ShellEnvelope(0.0, 1.0, 1.0, False, 0.5, 1.0)
    >>> assert("ShellEnvelope" in shell_case1.__str__())

    Case 2: Defined with a center coordinates, and a radius (= a sphere).
    >>> shell_case2 = ShellEnvelope(0.0, 0.0, 0.0, False, 1.0)
    >>> print(round(shell_case2.getArea(), 1))
    12.6

    Case 3: Defined with a Point3D, and a radius (= a sphere).
    >>> origin = Point3D(0.0, 0.0, 0.0, False)
    >>> shell_case3 = ShellEnvelope(origin, 1.0)
    >>> print(shell_case3.intersects(origin))
    True

    Case 4: From another ShellEnvelope
    >>> shell_case4 = ShellEnvelope(shell_case3)
    >>> print(shell_case4.isEqual(shell_case3))
    True

    Case 5: The null shell
    >>> shell_case5 = ShellEnvelope()
    >>> print(shell_case5.isNull())
    True

    To see all the available methods:
    >>> print(sorted(shell_case1.__dir__())) # doctest: +NORMALIZE_WHITESPACE
    ['center', 'contains', 'equals', 'expandBy', 'expandInnerRadius',
    'expandOuterRadius', 'expandToInclude', 'getArea', 'getClass',
    'getEnvelope', 'getHash', 'hasCenterCloseTo', 'hashCode', 'innerRadius',
    'innerRadius_$eq', 'intersects', 'intersectsShell', 'isEqual', 'isNull',
    'isPointInShell', 'notify', 'notifyAll', 'outerRadius', 'outerRadius_$eq',
    'setToNull', 'toHealpix', 'toHealpix$default$2', 'toString', 'wait']

    """
    warning = """
        There are 5 possibilities to instantiate a `ShellEnvelope`:

        Case 1: Defined with a center coordinates, inner and outer radius.
            args = [x: Double, y: Double, z: Double,
                isSpherical: Boolean, innerRadius: Double, outerRadius: Double]
        Case 2: Defined with a center coordinates, and a radius (= a sphere).
            args = [x: Double, y: Double, z: Double,
                isSpherical: Boolean, radius: Double]
        Case 3: Defined with a Point3D, and a radius (= a sphere).
            args = [p: Point3D(...), radius: Double]
        Case 4: from another ShellEnvelope
            args = [shell: ShellEnvelope(...)]
        Case 5: Null envelope
            args = []
    """
    scalapath = "com.astrolabsoftware.spark3d.geometryObjects.ShellEnvelope"
    shell = load_from_jvm(scalapath)

    # case 6
    if len(args) == 0:
        return shell()

    # Case 5
    elif len(args) == 1:
        cond_shell = "ShellEnvelope" in args[0].__str__()

        msg = """
        You are trying to instantiate a ShellEnvelope with 1 argument which is
        not a ShellEnvelope.

        {}
        """.format(warning)

        assert (cond_shell), msg

        return shell(args[0])

    # Case 3
    elif len(args) == 2:
        msg = """
        You are trying to instantiate a ShellEnvelope with 2 arguments
        which are not a Point3D (center) and a float (radius).

        {}
        """.format(warning)

        assert ("Point3D" in args[0].__str__()), msg
        assert (type(args[1]) is float or type(args[1]) is int), msg

        return shell(args[0], args[1])

    # Case 2
    elif len(args) == 5:
        msg = """
        You are trying to instantiate a ShellEnvelope with 5 arguments
        but there is one or several type mismatch.

        {}
        """.format(warning)

        assert (type(args[0]) == int or type(args[0]) == float), msg
        assert (type(args[1]) == int or type(args[1]) == float), msg
        assert (type(args[2]) == int or type(args[2]) == float), msg
        assert (type(args[3]) == bool), msg
        assert (type(args[4]) == int or type(args[4]) == float), msg

        return shell(args[0], args[1], args[2], args[3], args[4])
    # Case 1
    elif len(args) == 6:
        msg = """
        You are trying to instantiate a ShellEnvelope with 6 arguments
        but there is one or several type mismatch.

        {}
        """.format(warning)

        assert (type(args[0]) == int or type(args[0]) == float), msg
        assert (type(args[1]) == int or type(args[1]) == float), msg
        assert (type(args[2]) == int or type(args[2]) == float), msg
        assert (type(args[3]) == bool), msg
        assert (type(args[4]) == int or type(args[4]) == float), msg
        assert (type(args[5]) == int or type(args[5]) == float), msg

        return shell(args[0], args[1], args[2], args[3], args[4], args[5])
    else:
        msg = """
        Constructor not understood.

        {}
        """.format(warning)
        assert (False), msg
def BoxEnvelope(*args) -> JavaObject:
    """
    Binding around BoxEnvelope.scala. For full description,
    see `$spark3d/src/main/scala/com/spark3d/geometryObjects/BoxEnvelope.scala`

    The Scala version makes use of several constructors (i.e. with different
    kinds of argument). In order to mimick this within a single routine, we
    abstract the arguments of the constructor using the iterable `*args`.
    There are then 5 possibilities to instantiate a `BoxEnvelope`:

    Case 1: from coordinates
        args = [x1: float, x2: float, y1: float, y2: float,
            z1: float, z2: float]
    Case 2: from a single Point3D (i.e. the box is a Point3D)
        args = [p: Point3D(...)]
    Case 3: from three Point3D
        args = [p1: Point3D(...), p2: Point3D(...), p3: Point3D(...)]
    Case 4: from another BoxEnvelope
        args = [b: BoxEnvelope(...)]
    Case 5: Null envelope
        args = []

    Coordinates of input Point3D MUST be cartesian.

    Returns
    ----------
    box : BoxEnvelope instance
        An instance of the class BoxEnvelope. Throw an error if the
        iterable in the constructor is not understood.

    Example
    ----------
    >>> from pyspark3d.geometryObjects import Point3D

    Case 1: Cube from coordinates
    >>> box_case1 = BoxEnvelope(0.0, 1.0, 0.0, 1.0, 0.0, 1.0)
    >>> print(box_case1.__str__())
    Env[0.0 : 1.0, 0.0 : 1.0, 0.0 : 1.0, ]

    Case 2: Zero volume
    >>> p3d = Point3D(0.0, 0.0, 0.0, False)
    >>> box_case2 = BoxEnvelope(p3d)
    >>> print(box_case2.getVolume())
    0.0

    Case 3: Cube from 3 Point3D
    >>> p3d_1 = Point3D(0.0, 1.0, 0.0, False)
    >>> p3d_2 = Point3D(0.1, 1.0, 0.0, False)
    >>> p3d_3 = Point3D(1.0, -1.0, 1.0, False)
    >>> origin = Point3D(0.0, 0.0, 0.0, False)
    >>> box_case3 = BoxEnvelope(p3d_1, p3d_2, p3d_3)
    >>> print(box_case3.contains(origin))
    True

    Case 4: From another envelope
    >>> box_case4 = BoxEnvelope(box_case3)
    >>> print(box_case4.isEqual(box_case3))
    True

    Case 5: The null cube
    >>> box_case5 = BoxEnvelope()
    >>> print(box_case5.isNull())
    True

    To see all the available methods:
    >>> print(sorted(box_case1.__dir__())) # doctest: +NORMALIZE_WHITESPACE
    ['apply', 'center', 'contains', 'covers', 'distance', 'equals', 'expandBy',
    'expandOutwards', 'expandToInclude', 'getClass', 'getEnvelope', 'getHash',
    'getVolume', 'getXLength', 'getYLength', 'getZLength', 'hasCenterCloseTo',
    'hashCode', 'indexID', 'indexID_$eq', 'intersection', 'intersects',
    'intersectsBox', 'intersectsRegion', 'isEqual', 'isNull', 'maxExtent',
    'maxX', 'maxX_$eq', 'maxY', 'maxY_$eq', 'maxZ', 'maxZ_$eq', 'minExtent',
    'minX', 'minX_$eq', 'minY', 'minY_$eq', 'minZ', 'minZ_$eq', 'notify',
    'notifyAll', 'setToNull', 'toHealpix', 'toHealpix$default$2', 'toString',
    'translate', 'wait']

    """
    warning = """
        There are 5 possibilities to instantiate a `BoxEnvelope`:

        Case 1: from coordinates
            args = [x1: float, x2: float, y1: float, y2: float,
                z1: float, z2: float]
        Case 2: from a single Point3D (i.e. the box is a Point3D)
            args = [p: Point3D(...)]
        Case 3: from three Point3D
            args = [p1: Point3D(...), p2: Point3D(...), p3: Point3D(...)]
        Case 4: from another BoxEnvelope
            args = [b: BoxEnvelope(...)]
        Case 5: Null envelope
            args = []
    """
    scalapath = "com.astrolabsoftware.spark3d.geometryObjects.BoxEnvelope"
    box = load_from_jvm(scalapath)

    # case 6
    if len(args) == 0:
        return box()

    # Case 2 or 4
    elif len(args) == 1:
        cond_p3d = "Point3D" in args[0].__str__()
        cond_box = "Env" in args[0].__str__()

        msg = """
        You are trying to instantiate a BoxEnvelope with 1 argument which is
        neither a Point3D nor a BoxEnvelope.

        {}
        """.format(warning)

        assert (cond_p3d or cond_box), msg

        return box(args[0])

    # Case 3
    elif len(args) == 3:
        msg = """
        You are trying to instantiate a BoxEnvelope with 3 arguments and one
        at least is not a Point3D.

        {}
        """.format(warning)

        for arg in args:
            assert ("Point3D" in arg.__str__()), msg

        return box(args[0], args[1], args[2])

    # Case 3
    elif len(args) == 6:
        msg = """
        You are trying to instantiate a BoxEnvelope with 3 arguments and one
        at least is not a Point3D.

        {}
        """.format(warning)

        for arg in args:
            assert (type(arg) == int or type(arg) == float), msg

        return box(args[0], args[1], args[2], args[3], args[4], args[5])
    else:
        msg = """
        Constructor not understood.

        {}
        """.format(warning)
        assert (False), msg
def Point3D(x: float, y: float, z: float, isSpherical: bool) -> JavaObject:
    """
    Binding around Point3D.scala. For full description,
    see `$spark3d/src/main/scala/com/spark3d/geometryObjects/Point3D.scala`.

    By default, the input coordinates are supposed euclidean,
    that is (x, y, z). The user can also work with spherical input coordinates
    (x=r, y=theta, z=phi) by setting the argument isSpherical=true.

    Parameters
    ----------
    x : float
        Input X coordinate in Euclidean space, and R in spherical space.
    y : float
        Input Y coordinate in Euclidean space, and THETA in spherical space.
    z : float
        Input Z coordinate in Euclidean space, and PHI in spherical space.
    isSpherical : bool
        If true, it assumes that the coordinates of the Point3D
        are (r, theta, phi). Otherwise, it assumes cartesian
        coordinates (x, y, z).

    Returns
    ----------
    p3d : Point3D instance
        An instance of the class Point3D.

    Example
    ----------
    Instantiate a point with spherical coordinates (r, theta, phi)
    >>> p3d = Point3D(1.0, np.pi, 0.0, True)

    The returned type is JavaObject (Point3D instance)
    >>> print(type(p3d))
    <class 'py4j.java_gateway.JavaObject'>

    You can then call the method associated, for example
    >>> p3d.getVolume()
    0.0

    Return the point coordinates
    >>> p3d = Point3D(1.0, 1.0, 0.0, False)
    >>> p3d.getCoordinatePython()
    [1.0, 1.0, 0.0]

    It will be a JavaList by default
    >>> coord = p3d.getCoordinatePython()
    >>> print(type(coord))
    <class 'py4j.java_collections.JavaList'>

    Make it a python list
    >>> coord_python = list(coord)
    >>> print(type(coord_python))
    <class 'list'>

    [Astro] Convert the (theta, phi) in Healpix pixel index:
    >>> p3d = Point3D(1.0, np.pi, 0.0, True) # (z, theta, phi)
    >>> p3d.toHealpix(2048, True)
    50331644

    To see all the available methods:
    >>> print(sorted(p3d.__dir__())) # doctest: +NORMALIZE_WHITESPACE
    ['center', 'distanceTo', 'equals', 'getClass', 'getCoordinate',
    'getCoordinatePython', 'getEnvelope', 'getHash', 'getVolume',
    'hasCenterCloseTo', 'hashCode', 'intersects', 'isEqual', 'isSpherical',
    'notify', 'notifyAll', 'toHealpix', 'toHealpix$default$2', 'toString',
    'wait', 'x', 'y', 'z']
    """
    scalapath = "com.astrolabsoftware.spark3d.geometryObjects.Point3D"
    p3d = load_from_jvm(scalapath)

    return p3d(x, y, z, isSpherical)
Exemple #6
0
def windowquery(df: DataFrame, windowtype: str, windowcoord: int,
                coordSys: str):
    """ Perform window query, that is match between DF elements and
    a user-defined window (point, sphere, shell, box).

    If windowtype =
        - point: windowcoord = [x, y, z]
        - sphere: windowcoord = [x, y, z, R]
        - shell: windowcoord = [x, y, z, Rin, Rout]
        - box: windowcoord = [x1, y1, z1, x2, y2, z2, x3, y3, z3]
    Use [x, y, z] for cartesian or [r, theta, phi] for spherical.
    Note that box only accepts cartesian coordinates.

    Parameters
    ----------
    df : DataFrame
        Input Dataframe. Must have 3 columns corresponding to the
        coordinate (x, y, z) if cartesian or (r, theta, phi) f spherical.
    windowtype : str
        point, shell, sphere, or box.
    windowcoord : list of float
        List of Doubles. The coordinates of the window (see doc above).
    coordSys : str
        Coordinate system: spherical or cartesian

    Returns
    --------
    out : DataFrame
        DataFrame with the coordinates of the objects found in the window

    Examples
    --------
    >>> df = spark.read.format("csv")\
        .option("inferSchema", True)\
        .option("header", True)\
        .load("../src/test/resources/cartesian_spheres_manual.csv")

    Point-like window
    >>> windowtype = "point"
    >>> windowcoord = [1.0, 1.0, 1.0]
    >>> env = windowquery(df.select("x", "y", "z"), windowtype, windowcoord, "cartesian")

    >>> print(env.count())
    2

    You can add back the metadata
    >>> envWithMeta = df.join(env, ["x", "y", "z"], "left_semi")

    Sphere-like window
    >>> windowtype = "sphere"
    >>> windowcoord = [1.0, 1.0, 1.0, 2.0]
    >>> env = windowquery(df.select("x", "y", "z"), windowtype, windowcoord, "cartesian")

    >>> print(env.count())
    3

    Shell-like window
    >>> windowtype = "shell"
    >>> windowcoord = [1.0, 1.0, 1.0, 0.0, 2.0]
    >>> env = windowquery(df.select("x", "y", "z"), windowtype, windowcoord, "cartesian")

    >>> print(env.count())
    3

    Box-like window
    >>> windowtype = "box"
    >>> windowcoord = [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0]
    >>> env = windowquery(df.select("x", "y", "z"), windowtype, windowcoord, "cartesian")

    >>> print(env.count())
    2
    """
    prefix = "com.astrolabsoftware.spark3d"
    scalapath = "{}.Queries.windowQuery".format(prefix)
    scalaclass = load_from_jvm(scalapath)

    # # To convert python List to Scala Map
    convpath = "{}.python.PythonClassTag.javaListtoscalaList".format(prefix)
    conv = load_from_jvm(convpath)

    out = _java2py(
        get_spark_context(),
        scalaclass(df._jdf, windowtype, conv(windowcoord), coordSys))

    return out
Exemple #7
0
def repartitionByCol(df: DataFrame,
                     colname: str,
                     preLabeled: bool,
                     numPartitions: int = -1):
    """Repartition a DataFrame according to a column containing partition ID.

    Note this is not re-ordering elements, but making new partitions with
    objects having the same partition ID defined by one of the
    DataFrame column (i.e. shuffling).

    Parameters
    ----------
    df : DataFrame
        Input DataFrame.
    colname : str
        Column name describing the repartitioning. Typically Ints.
    preLabeled : bool
        True means the column containing the partition ID contains
        already numbers from 0 to `numPartitions - 1`. false otherwise.
        Note that in the latter, the execution time will be longer as
        we need to map column values to partition ID.
    numPartitions : int
        (optional )Number of partitions. If not provided the code will
        guess the number of partitions by counting the number of distinct
        elements of the repartitioning column.
        As it can be costly, you can provide manually this information.

    Returns
    ---------
    dfout : DataFrame
        Repartitioned input DataFrame.

    Examples
    ---------
    Load data
    >>> df = spark.read.format("fits")\
        .option("hdu", 1)\
        .load("../src/test/resources/astro_obs.fits")

    Specify options
    >>> options = {
    ...     "geometry": "points",
    ...     "colnames": "Z_COSMO,RA,DEC",
    ...     "coordSys": "spherical",
    ...     "gridtype": "onion"}

    Add a column containing the partitioning (Onion)
    >>> df_colid = prePartition(df, options, 10)
    >>> print(df_colid.select("partition_id").distinct().count())
    10

    Trigger the repartitioning
    >>> df_repart = repartitionByCol(df_colid, "partition_id", True, 10)
    >>> def mapLen(part): yield len([*part])
    >>> df_repart.rdd.mapPartitions(mapLen).take(1)[0]
    2104
    """
    prefix = "com.astrolabsoftware.spark3d"
    scalapath = "{}.Repartitioning.repartitionByCol".format(prefix)
    scalaclass = load_from_jvm(scalapath)

    dfout = _java2py(get_spark_context(),
                     scalaclass(df._jdf, colname, preLabeled, numPartitions))

    return dfout
Exemple #8
0
def prePartition(df: DataFrame,
                 options: Dict = {"": ""},
                 numPartitions: int = -1):
    """Add a DataFrame column describing the partitioning.

    This method allows to use a custom partitioner (SpatialPartitioner).
    Note that no data movement (shuffle) is performed yet here, as we just
    describe how the repartitioning should be done.
    Use `partitionBy` to trigger it.

    `options` must contain four entries:
       - gridtype: the type of repartitioning. Available: current (no repartitioning), onion, octree.
       - geometry: geometry of objects: points, spheres, or boxes
       - coordSys: coordinate system: spherical or cartesian
       - colnames: comma-separated names of the spatial coordinates. For points,
            must be "x,y,z" or "r,theta,phi". For spheres, must be "x,y,z,R" or
            "r,theta,phi,R".

    Parameters
    ----------
    df : DataFrame
        Input DataFrame
    options : Dictionary of Strings
        Dictionary containing metadata (see above).
    numPartitions : int
        (optional) The number of partitions wanted. -1 by default,
        i.e. the number of partitions of the input DataFrame.

    Returns
    ----------
    dfout : DataFrame
        Input DataFrame plus an additional column `partition_id`.

    Examples
    ----------
    Load data
    >>> df = spark.read.format("fits")\
        .option("hdu", 1)\
        .load("../src/test/resources/astro_obs.fits")

    Specify options
    >>> options = {
    ...     "geometry": "points",
    ...     "colnames": "Z_COSMO,RA,DEC",
    ...     "coordSys": "spherical",
    ...     "gridtype": "onion"}

    Add a column containing the partitioning (Onion)
    >>> df_colid = prePartition(df, options, 10)
    >>> print(df_colid.select("partition_id").distinct().count())
    10

    Note that you can also return the current partitioning:
    >>> options = {
    ...     "geometry": "points",
    ...     "colnames": "Z_COSMO,RA,DEC",
    ...     "coordSys": "spherical",
    ...     "gridtype": "current"}
    >>> df_colid = prePartition(df, options)
    >>> assert(df_colid.select("partition_id").distinct().count() == df.rdd.getNumPartitions())
    """
    prefix = "com.astrolabsoftware.spark3d"
    scalapath = "{}.Repartitioning.prePartition".format(prefix)
    scalaclass = load_from_jvm(scalapath)

    # To convert python dic to Scala Map
    convpath = "{}.python.PythonClassTag.javaHashMaptoscalaMap".format(prefix)
    conv = load_from_jvm(convpath)

    dfout = _java2py(get_spark_context(),
                     scalaclass(df._jdf, conv(options), numPartitions))

    return dfout