Exemple #1
0
def test_parquet(gp_point, gp_multipoint, gp_multiline, tmp_path):
    # Build dataframe
    n = min(len(gp_multipoint), len(gp_multiline))
    df = GeoDataFrame({
        'point': GeoSeries(gp_point[:n]),
        'multipoint': GeoSeries(gp_multipoint[:n]),
        'multiline': GeoSeries(gp_multiline[:n]),
        'a': list(range(n))
    })

    path = tmp_path / 'df.parq'
    to_parquet(df, path)
    df_read = read_parquet(path)
    assert isinstance(df_read, GeoDataFrame)
    assert all(df == df_read)
def test_parquet_columns(gp_point, gp_multipoint, gp_multiline, tmp_path):
    # Build dataframe
    n = min(len(gp_multipoint), len(gp_multiline))
    df = GeoDataFrame({
        'point': GeoSeries(gp_point[:n]),
        'multipoint': GeoSeries(gp_multipoint[:n]),
        'multiline': GeoSeries(gp_multiline[:n]),
        'a': list(range(n))
    })

    path = tmp_path / 'df.parq'
    to_parquet(df, path)
    columns = ['a', 'multiline']
    df_read = read_parquet(str(path), columns=columns)
    assert isinstance(df_read, GeoDataFrame)
    pd.testing.assert_frame_equal(df[columns], df_read)
def test_parquet(gp_point, gp_multipoint, gp_multiline, tmp_path):
    # Build dataframe
    n = min(len(gp_multipoint), len(gp_multiline))
    df = GeoDataFrame({
        'point': GeoSeries(gp_point[:n]),
        'multipoint': GeoSeries(gp_multipoint[:n]),
        'multiline': GeoSeries(gp_multiline[:n]),
        'a': list(range(n))
    })

    df.index.name = 'range_idx'

    path = tmp_path / 'df.parq'
    to_parquet(df, path)
    df_read = read_parquet(str(path),
                           columns=['point', 'multipoint', 'multiline', 'a'])
    assert isinstance(df_read, GeoDataFrame)
    pd.testing.assert_frame_equal(df, df_read)
    assert df_read.index.name == df.index.name
Exemple #4
0
def split_device_trajectories(
    file: str,
    output: str,
    study: gpd.GeoDataFrame,
    split_opts: Dict[str, Any],
    min_length: float,
    min_duration: float,
) -> None:
    """Split device trajectories."""
    out = f"{output}/device_{file.split('.')[-2]}.parquet"
    if exists(out):
        return None
    df = read_parquet(file)[[
        "device_id", "latitude", "longitude", "timestamp"
    ]]
    if len(df) < 2:
        return None
    tc = split_trajectories(df, **split_opts)
    gdf = extract_traj_info(tc)
    gdf = gdf[(gdf["length"] >= min_length)
              & (gdf["duration"] >= min_duration)]
    gdf = gpd.sjoin(gdf, study).drop(columns=["index_right"])
    sdf = spd.GeoDataFrame(gdf)
    to_parquet(sdf, out)