コード例 #1
0
def test_lcss():
    move_df = MoveDataFrame(
        data=list_data,
        latitude=LATITUDE,
        longitude=LONGITUDE,
        datetime=DATETIME,
        traj_id=TRAJ_ID,
    )

    move_df_2 = MoveDataFrame(
        data=list_data_2,
        latitude=LATITUDE,
        longitude=LONGITUDE,
        datetime=DATETIME,
        traj_id=TRAJ_ID,
    )

    move_lcss = generate_lcss(move_df, move_df_2, 60)

    cols = [
        'ida', 'idb', 'datetime_ida', 'datetime_idb', 'difference', 'equals',
        'edge'
    ]

    expected = DataFrame(
        data=[[
            1, 2,
            Timestamp('2019-06-05 07:02:42'),
            Timestamp('2019-06-05 07:02:40'), 2, True,
            list([1938809894, 2527401909])
        ],
              [
                  1, 2,
                  Timestamp('2019-06-05 07:03:42'),
                  Timestamp('2019-06-05 07:03:40'), 2, True,
                  list([2527401903, 2527388956])
              ],
              [
                  1, 2,
                  Timestamp('2019-06-05 07:04:42'),
                  Timestamp('2019-06-05 07:04:40'), 2, True,
                  list([2527401895, 6502622934])
              ],
              [
                  1, 2,
                  Timestamp('2019-06-05 07:05:42'),
                  Timestamp('2019-06-05 07:05:40'), 2, True,
                  list([2862103272, 2862103258])
              ]],
        columns=cols,
        index=[0, 1, 2, 3],
    )

    assert_frame_equal(move_lcss, expected)
    assert len(move_lcss) == 4
コード例 #2
0
    def convert_to(
        self, new_type: Text
    ) -> Union['PandasMoveDataFrame', 'DaskMoveDataFrame']:
        """
        Convert an object from one type to another specified by the user.

        Parameters
        ----------
        new_type: 'pandas' or 'dask'
            The type for which the object will be converted.

        Returns
        -------
        A subclass of MoveDataFrameAbstractModel
            The converted object.

        """

        if new_type == TYPE_DASK:
            return self
        elif new_type == TYPE_PANDAS:
            df_pandas = self._data.compute()
            return MoveDataFrame(
                df_pandas,
                latitude=LATITUDE,
                longitude=LONGITUDE,
                datetime=DATETIME,
                traj_id=TRAJ_ID,
                type_=TYPE_PANDAS
            )
コード例 #3
0
def _default_move_df():
    return MoveDataFrame(data=[
        [39.984094, 116.319236, '2008-10-23 05:53:05', 1],
        [39.984198, 116.319322, '2008-10-23 05:53:06', 1],
        [39.984224, 116.319402, '2008-10-23 05:53:11', 1],
        [39.984224, 116.319402, '2008-10-23 05:53:11', 2],
    ])
コード例 #4
0
ファイル: trajectories.py プロジェクト: InsightLab/PyMove
def read_csv(filepath_or_buffer: FilePathOrBuffer,
             latitude: str = LATITUDE,
             longitude: str = LONGITUDE,
             datetime: str = DATETIME,
             traj_id: str = TRAJ_ID,
             type_: str = TYPE_PANDAS,
             n_partitions: int = 1,
             **kwargs):
    """
    Reads a `csv` file and structures the data.

    Parameters
    ----------
    filepath_or_buffer : str or path object or file-like object
        Any valid string path is acceptable. The string could be a URL.
        Valid URL schemes include http, ftp, s3, gs, and file.
        For file URLs, a host is expected.
        A local file could be: file://localhost/path/to/table.csv.
        If you want to pass in a path object, pandas accepts any os.PathLike.
        By file-like object, we refer to objects with a read() method,
        such as a file handle (e.g. via builtin open function) or StringIO.
    latitude : str, optional
        Represents the column name of feature latitude, by default 'lat'
    longitude : str, optional
        Represents the column name of feature longitude, by default 'lon'
    datetime : str, optional
        Represents the column name of feature datetime, by default 'datetime'
    traj_id : str, optional
        Represents the column name of feature id trajectory, by default 'id'
    type_ : str, optional
        Represents the type of the MoveDataFrame, by default 'pandas'
    n_partitions : int, optional
        Represents number of partitions for DaskMoveDataFrame, by default 1
    **kwargs : Pandas read_csv arguments
        https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html?highlight=read_csv#pandas.read_csv

    Returns
    -------
    MoveDataFrameAbstract subclass
        Trajectory data

    Examples
    --------
    >>> from pymove.utils.trajectories import read_csv
    >>> move_df = read_csv('geolife_sample.csv')
    >>> move_df.head()
              lat          lon              datetime  id
    0   39.984094   116.319236   2008-10-23 05:53:05   1
    1   39.984198   116.319322   2008-10-23 05:53:06   1
    2   39.984224   116.319402   2008-10-23 05:53:11   1
    3   39.984211   116.319389   2008-10-23 05:53:16   1
    4   39.984217   116.319422   2008-10-23 05:53:21   1
    >>> type(move_df)
    <class 'pymove.core.pandas.PandasMoveDataFrame'>
    """
    data = _read_csv(filepath_or_buffer, **kwargs)

    return MoveDataFrame(data, latitude, longitude, datetime, traj_id, type_,
                         n_partitions)
コード例 #5
0
def test_map_matching_node():
    move_df = MoveDataFrame(data=dict_data)

    map_matching_node(move_df)

    cols = ['id', 'lat', 'lon', 'datetime', 'geometry']

    expected = DataFrame(
        data=[
            [
                1,
                -3.779240,
                -38.678747,
                Timestamp('2008-06-12 12:00:50'),
                Point(-38.6787469, -3.7792405),
            ],
            [
                1,
                -3.779240,
                -38.678747,
                Timestamp('2008-06-12 12:00:56'),
                Point(-38.6787469, -3.7792405),
            ],
            [
                1,
                -3.778692,
                -38.678440,
                Timestamp('2008-06-12 12:01:01'),
                Point(-38.6784397, -3.7786924),
            ],
            [
                1, -3.778692, -38.678440,
                Timestamp('2008-06-12 12:01:06'),
                Point(-38.6784397, -3.7786924)
            ],
        ],
        columns=cols,
        index=[0, 1, 2, 3],
    )

    assert_frame_equal(move_df, expected)

    assert move_df.len() == 4
コード例 #6
0
ファイル: trajectories.py プロジェクト: JoaoCarabetta/PyMove
def read_csv(filepath_or_buffer: FilePathOrBuffer,
             latitude: Optional[Text] = LATITUDE,
             longitude: Optional[Text] = LONGITUDE,
             datetime: Optional[Text] = DATETIME,
             traj_id: Optional[Text] = TRAJ_ID,
             type_: Optional[Text] = TYPE_PANDAS,
             n_partitions: Optional[int] = 1,
             **kwargs):
    """
    Reads a .csv file and structures the data into the desired structure
    supported by PyMove.

    Parameters

    ----------
    filepath_or_buffer : str or path object or file-like object
        Any valid string path is acceptable. The string could be a URL.
        Valid URL schemes include http, ftp, s3, gs, and file.
        For file URLs, a host is expected.
        A local file could be: file://localhost/path/to/table.csv.
        If you want to pass in a path object, pandas accepts any os.PathLike.
        By file-like object, we refer to objects with a read() method,
        such as a file handle (e.g. via builtin open function) or StringIO.
    latitude : str, optional
        Represents the column name of feature latitude, by default 'lat'
    longitude : str, optional
        Represents the column name of feature longitude, by default 'lon'
    datetime : str, optional
        Represents the column name of feature datetime, by default 'datetime'
    traj_id : str, optional
        Represents the column name of feature id trajectory, by default 'id'
    type_ : str, optional
        Represents the type of the MoveDataFrame, by default 'pandas'
    n_partitions : int, optional
        Represents number of partitions for DaskMoveDataFrame, by default 1
    **kwargs : Pandas read_csv arguments
        https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html?highlight=read_csv#pandas.read_csv

    Returns
    -------
    MoveDataFrameAbstract subclass
        Trajectory data

    """

    data = _read_csv(filepath_or_buffer, **kwargs)

    return MoveDataFrame(data, latitude, longitude, datetime, traj_id, type_,
                         n_partitions)
コード例 #7
0
def test_validate_columns():
    df = DataFrame(data=[[0, 0, '01-01-2020', 0]],
                   columns=['lat', 'lon', 'datetime', 'id'])
    MoveDataFrame.validate_move_data_frame(df)

    expected = Series(
        data=['float64', 'float64', '<M8[ns]', 'int64'],
        index=['lat', 'lon', 'datetime', 'id'],
        dtype='object',
        name=None,
    )
    assert_series_equal(df.dtypes, expected)

    df = DataFrame(data=[[0, 0]], columns=['lat', 'lon'])

    try:
        MoveDataFrame.validate_move_data_frame(df)
        raise AssertionError(
            'AttributeError error not raised by MoveDataFrame')
    except KeyError:
        pass

    df = DataFrame(data=[['a', 0, '01-01-2020']],
                   columns=['lat', 'lon', 'datetime'])

    try:
        MoveDataFrame.validate_move_data_frame(df)
        raise AssertionError(
            'AttributeError error not raised by MoveDataFrame')
    except ValueError:
        pass

    df = DataFrame(data=[[0, 0, '0']], columns=['lat', 'lon', 'datetime'])

    try:
        MoveDataFrame.validate_move_data_frame(df)
        raise AssertionError(
            'AttributeError error not raised by MoveDataFrame')
    except ParserError:
        pass
コード例 #8
0
def test_has_columns():
    df = DataFrame(columns=['lat', 'lon', 'datetime'])
    assert MoveDataFrame.has_columns(df)

    df = DataFrame(columns=['lat', 'lon', 'time'])
    assert not MoveDataFrame.has_columns(df)
コード例 #9
0
def test_format_labels():

    expected = {'col1': 'id', 'col3': 'lon', 'col2': 'lat', 'col4': 'datetime'}
    labels = MoveDataFrame.format_labels('col1', 'col2', 'col3', 'col4')

    assert_equal(labels, expected)
コード例 #10
0
    def __init__(
        self,
        data: Union[DataFrame, List, Dict],
        latitude: Optional[Text] = LATITUDE,
        longitude: Optional[Text] = LONGITUDE,
        datetime: Optional[Text] = DATETIME,
        traj_id: Optional[Text] = TRAJ_ID,
        n_partitions: Optional[int] = 1,
    ):
        """
        Checks whether past data has 'lat', 'lon', 'datetime' columns, and
        renames it with the PyMove lib standard. After starts the attributes of
        the class.

        - self._data : Represents trajectory data.
        - self._type : Represents the type of layer below the data structure.
        - self.last_operation : Represents the last operation performed.

        Parameters
        ----------
        data : dict, list, numpy array or pandas.core.DataFrame
            Input trajectory data.
        latitude : str, optional, default 'lat'.
            Represents column name latitude.
        longitude : str, optional, default 'lon'.
            Represents column name longitude.
        datetime : str, optional, default 'datetime'.
            Represents column name datetime.
        traj_id : str, optional, default 'id'.
            Represents column name trajectory id.
        n_partitions : int, optional, default 1.
            Number of partitions of the dask dataframe.

        Raises
        ------
        KeyError
            If missing one of lat, lon, datetime columns
        ValueError, ParserError
            If the data types can't be converted.

        """

        if isinstance(data, dict):
            data = pd.DataFrame.from_dict(data)
        elif (
            (isinstance(data, list) or isinstance(data, np.ndarray))
            and len(data) >= 4
        ):
            zip_list = [LATITUDE, LONGITUDE, DATETIME, TRAJ_ID]
            for i in range(len(data[0])):
                try:
                    zip_list[i] = zip_list[i]
                except KeyError:
                    zip_list.append(i)
            data = pd.DataFrame(data, columns=zip_list)

        mapping_columns = MoveDataFrame.format_labels(
            traj_id, latitude, longitude, datetime
        )
        dsk = data.rename(columns=mapping_columns)

        if MoveDataFrame.has_columns(dsk):
            MoveDataFrame.validate_move_data_frame(dsk)
            self._data = dask.dataframe.from_pandas(
                dsk, npartitions=n_partitions
            )
            self._type = TYPE_DASK
            self.last_operation = None
        else:
            raise AttributeError(
                'Couldn\'t instantiate MoveDataFrame because data has missing columns.'
            )
コード例 #11
0
]


def _default_move_df():
    return MoveDataFrame(data=[
        [39.984094, 116.319236, '2008-10-23 05:53:05', 1],
        [39.984198, 116.319322, '2008-10-23 05:53:06', 1],
        [39.984224, 116.319402, '2008-10-23 05:53:11', 1],
        [39.984224, 116.319402, '2008-10-23 05:53:11', 2],
    ])


move_df = MoveDataFrame(
    data=list_data,
    latitude=0,
    longitude=1,
    datetime=2,
    traj_id=3,
)


def test_generate_distances():

    move_distances = generate_distances(move_df)

    cols = [
        'lat', 'lon', 'datetime', 'id', 'edgeDistance',
        'distFromTrajStartToCurrPoint'
    ]

    expected = DataFrame(data=[[
コード例 #12
0
def read_csv(
    filename,
    latitude=LATITUDE,
    longitude=LONGITUDE,
    datetime=DATETIME,
    traj_id=TRAJ_ID,
    type_=TYPE_PANDAS,
    n_partitions=1,
    sep=',',
    encoding='utf-8',
    header='infer',
    names=None,
    index_col=None,
    usecols=None,
    dtype=None,
    nrows=None,
):
    """
    Reads a .csv file and structures the data into the desired structure
    supported by PyMove.

    Parameters

    ----------
    filename : String.
        Represents coordinates lat, lon which will be the center of the map.
    latitude : String, optional, default 'lat'.
        Represents the column name of feature latitude.
    longitude : String, optional, default 'lon'.
        Represents the column name of feature longitude.
    datetime : String, optional, default 'datetime'.
        Represents the column name of feature datetime.
    traj_id : String, optional, default 'id'.
        Represents the column name of feature id trajectory.
    type_ : String, optional, default 'pandas'.
        Represents the type of the MoveDataFrame
    n_partitions : int, optional, default 1.
        Represents number of partitions for DaskMoveDataFrame
    sep : String, optional, default ','.
        Delimiter to use.
    encoding : String, optional, default 'utf-8'.
        Encoding to use for UTF when reading/writing
    header : int, list of int, default ‘infer’
        Row number(srs) to use as the column names, and the start of the data.
        Default behavior is to infer the column names: if no names are passed
        the behavior is identical to header=0 and column names are inferred from
        the first line of the file, if column names are passed explicitly then
        the behavior is identical to header=None
    names : array-like, optional
        List of column names to use. If the file contains a header row,
        then you should explicitly pass header=0 to override the column names.
        Duplicates in this list are not allowed.
    index_col : int, str, sequence of int / str, or False, default None
        Column(s) to use as the row labels of the DataFrame, either given as
        string name or column index.
        If a sequence of int / str is given, a MultiIndex is used.
    usecols : list-like or callable, optional, default None
        Return a subset of the columns. If list-like, all elements must either
        be positional (i.e. integer indices into the document columns) or strings
        that correspond to column names provided either by the user in names or
        inferred from the document header row(s).
    dtype : Type name or dict of column -> type, optional, default None
        Data type for data or columns.
        E.g. {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}
        Use str or object together with suitable na_values settings to
        preserve and not interpret dtype.
    nrows : int, optional, default None
        Number of rows of file to read. Useful for reading pieces of large files.

    Returns
    -------
    pymove.core.MoveDataFrameAbstract subclass.
        Trajectory data.

    """

    df = pd.read_csv(filename,
                     sep=sep,
                     encoding=encoding,
                     header=header,
                     names=names,
                     parse_dates=[datetime],
                     index_col=index_col,
                     usecols=usecols,
                     dtype=dtype,
                     nrows=nrows)

    return MoveDataFrame(df, latitude, longitude, datetime, traj_id, type_,
                         n_partitions)
コード例 #13
0
def test_map_matching_edge():
    move_df = MoveDataFrame(data=dict_data)

    map_matching_edge(move_df)

    cols = ['id', 'lat', 'lon', 'datetime', 'edge', 'geometry']

    expected = DataFrame(
        data=[
            [
                1, -3.779936, -38.67921,
                Timestamp('2008-06-12 12:00:50'), (3971291384, 7625732459),
                LineString([
                    (-38.6784397, -3.7786924),
                    (-38.6784773, -3.7787981),
                    (-38.6785128, -3.7788737),
                    (-38.678547, -3.7789333),
                    (-38.6787079, -3.7791822),
                    (-38.6787469, -3.7792405),
                ])
            ],
            [
                1, -3.779240, -38.678747,
                Timestamp('2008-06-12 12:00:56'), (3971291384, 7625732459),
                LineString([
                    (-38.6784397, -3.7786924),
                    (-38.6784773, -3.7787981),
                    (-38.6785128, -3.7788737),
                    (-38.678547, -3.7789333),
                    (-38.6787079, -3.7791822),
                    (-38.6787469, -3.7792405),
                ])
            ],
            [
                1, -3.778692, -38.67844,
                Timestamp('2008-06-12 12:01:01'), (3971291384, 7625732459),
                LineString([
                    (-38.6784397, -3.7786924),
                    (-38.6784773, -3.7787981),
                    (-38.6785128, -3.7788737),
                    (-38.678547, -3.7789333),
                    (-38.6787079, -3.7791822),
                    (-38.6787469, -3.7792405),
                ])
            ],
            [
                1, -3.778191, -38.678071,
                Timestamp('2008-06-12 12:01:06'), (3971291384, 7625732459),
                LineString([
                    (-38.6784397, -3.7786924),
                    (-38.6784773, -3.7787981),
                    (-38.6785128, -3.7788737),
                    (-38.678547, -3.7789333),
                    (-38.6787079, -3.7791822),
                    (-38.6787469, -3.7792405),
                ])
            ],
        ],
        columns=cols,
        index=[0, 1, 2, 3],
    )

    assert_frame_equal(move_df, expected)

    assert move_df.len() == 4