Example #1
0
    def _helper_test_base_writer(data, metadata, metadata_is_model):
        csv_path = metadata['path']
        metadata_path = csv_path + '.json'
        if metadata_is_model:
            metadata = MetaDataModel(metadata)

        if os.path.isfile(csv_path):
            os.remove(path=csv_path)
        if os.path.isfile(csv_path):
            os.remove(path=metadata_path)

        writer = BaseFileWriter()
        writer.write(data=data, metadata=metadata)

        reader = BaseFileReader()
        timestamps, new_data, columns = reader.read(metadata=metadata)
        np.testing.assert_array_equal(data, new_data)

        with open(metadata_path, 'r') as fd:
            new_metadata = json.load(fd)

        # NOTE: json.dumps(MetaDataModel(metadata_dict).data) is not equal json.dumps(metadata_dict)
        if metadata_is_model:
            metadata_comparable = json.dumps(metadata.data)
            new_metadata_comparable = json.dumps(
                MetaDataModel(new_metadata).data)
        else:
            metadata_comparable = json.dumps(metadata)
            new_metadata_comparable = json.dumps(new_metadata)

        if metadata_comparable != new_metadata_comparable:
            raise ValueError(
                'Saving metadata failed! (src and dest is unmatch)')
Example #2
0
def _test_v2_db_statistic():
    """Run the v2 DB statistics test."""
    import pandas as pd
    from pydtk.io import BaseFileReader
    from pydtk.statistics import BaseStatisticCalculation

    path = 'test/records/B05_17000000010000000829/data/records.bag'
    reader = BaseFileReader()
    timestamps, data, columns = reader.read(path=path,
                                            contents='/vehicle/acceleration')

    target_span = 0.3
    calculator = BaseStatisticCalculation(target_span)
    stat_df = calculator.statistic_tables(timestamps, data, columns)

    assert isinstance(stat_df, pd.core.frame.DataFrame)

    from pydtk.db.v2 import TimeSeriesDBHandler
    from requests.exceptions import ConnectionError

    try:
        db_handler = TimeSeriesDBHandler(df_name='test_statistics_span_0.3')
        db_handler.df = stat_df
        db_handler.save()
    except ConnectionError as e:
        print(str(e))
Example #3
0
def _test_v3_db_statistic():
    """Run the v3 DB statistics test."""
    from cassandra.cluster import NoHostAvailable
    import pandas as pd
    from pydtk.io import BaseFileReader
    from pydtk.statistics import BaseStatisticCalculation

    path = 'test/records/B05_17000000010000000829/data/records.bag'
    reader = BaseFileReader()
    timestamps, data, columns = reader.read(path=path,
                                            contents='/vehicle/acceleration')

    target_span = 0.3
    calculator = BaseStatisticCalculation(target_span)
    stat_df = calculator.statistic_tables(timestamps, data, columns)

    assert isinstance(stat_df, pd.core.frame.DataFrame)

    from pydtk.db import V3TimeSeriesCassandraDBHandler

    try:
        db_handler = V3TimeSeriesCassandraDBHandler(
            df_name='test_statistics_span_0')
        db_handler.df = stat_df
        db_handler.save()
    except NoHostAvailable as e:
        print(str(e))
Example #4
0
def test_v3_db_statistic_sqlite_2():
    """Run the v3 DB statistics test."""
    import pandas as pd
    from pydtk.io import BaseFileReader
    from pydtk.statistics import BaseStatisticCalculation
    from pydtk.db import V3DBHandler as DBHandler

    path = 'test/records/B05_17000000010000000829/data/records.bag'
    reader = BaseFileReader()
    timestamps, data, columns = reader.read(path=path,
                                            contents='/vehicle/acceleration')
    # timestamps, data, columns = reader.read(path=path, contents='/vehicle/analog/speed_pulse')

    target_span = 0.3
    calculator = BaseStatisticCalculation(target_span)
    stat_df = calculator.statistic_tables(timestamps, data, columns)
    stat_df['record_id'] = 'B05_17000000010000000829'

    assert isinstance(stat_df, pd.core.frame.DataFrame)

    db_handler = DBHandler(
        database_id='test',
        span=0.3,
        db_class='statistics',
        db_engine='sqlite',
        db_host='test/test_statistics.db',
        db_username='',
        db_password='',
        db_name='',
        read_on_init=False,
    )
    db_handler.df = stat_df
    db_handler.save(remove_duplicates=True)
Example #5
0
def test_base_reader():
    """Run the base reader test."""
    import numpy as np
    from pydtk.io import BaseFileReader
    path = 'test/records/annotation_model_test/annotation_test.csv'
    reader = BaseFileReader()
    timestamps, data, columns = reader.read(path=path)

    assert isinstance(data, np.ndarray)
Example #6
0
def test_base_reader_non_ndarray():
    """Run the base reader test."""
    from pydtk.io import BaseFileReader

    path = 'test/records/json_model_test/json_test.json'
    reader = BaseFileReader()
    timestamps, data, columns = reader.read(path=path, as_ndarray=False)

    assert isinstance(data, dict)
Example #7
0
def test_base_reader_rosbag_can():
    """Run the base reader test."""
    import numpy as np
    from pydtk.io import BaseFileReader
    path = 'test/records/rosbag_model_test/data/records.bag'
    reader = BaseFileReader()
    timestamps, data, columns = reader.read(path=path,
                                            contents='/vehicle/can_raw')

    assert isinstance(data, np.ndarray)
Example #8
0
def test_separated_data():
    """Run the base reader test."""
    import numpy as np
    from pydtk.models import MetaDataModel
    from pydtk.io import BaseFileReader
    metadata_path = 'test/records/sample/separated_data/records.bag.json'
    metadata = MetaDataModel()
    metadata.load(metadata_path)
    reader = BaseFileReader()
    timestamps, data, columns = reader.read(
        metadata=metadata, contents='/points_concat_downsampled')

    assert isinstance(data, np.ndarray)
Example #9
0
def test_load_from_metadata_dict():
    """Run the base reader test."""
    import numpy as np
    from pydtk.models import MetaDataModel
    from pydtk.io import BaseFileReader
    metadata_path = 'test/records/rosbag_model_test/data/records.bag.json'
    metadata = MetaDataModel()
    metadata.load(metadata_path)
    metadata = metadata.data
    reader = BaseFileReader()
    timestamps, data, columns = reader.read(
        metadata=metadata, contents='/vehicle/analog/speed_pulse')

    assert isinstance(data, np.ndarray)
Example #10
0
def test_base_statistic_calculation():
    """Run the base statistic calculation test."""
    import pandas as pd
    from pydtk.io import BaseFileReader
    from pydtk.statistics import BaseStatisticCalculation

    path = 'test/records/B05_17000000010000000829/data/records.bag'
    reader = BaseFileReader()
    timestamps, data, columns = reader.read(path=path,
                                            contents='/vehicle/acceleration')

    target_span = 0.3
    calculator = BaseStatisticCalculation(target_span)
    stat_df = calculator.statistic_tables(timestamps, data, columns)

    assert isinstance(stat_df, pd.core.frame.DataFrame)
Example #11
0
def _test_custom_df_v2():
    """Create a custom dataframe."""
    from pydtk.db import V2BaseDBHandler, V2MetaDBHandler
    from pydtk.io import BaseFileReader, NoModelMatchedError
    import pandas as pd

    meta_db = V2MetaDBHandler(db_engine='sqlite',
                              db_host='test/test_v2.db',
                              base_dir_path='test')
    reader = BaseFileReader()

    # meta_db.read(where='tags like "%gnss%"')

    try:
        for sample in meta_db:
            print('loading content "{0}" from file "{1}"'.format(
                sample['contents'], sample['path']))
            try:
                # Initialize DB for storing features
                feats_db = V2BaseDBHandler(db_engine='sqlite',
                                           db_host='test/test.db',
                                           df_name=sample['contents'],
                                           read_on_init=False)

                # Load data from file
                timestamps, data, columns = reader.read(**sample)

                # Create DataFrame
                timestamps_df = pd.Series(timestamps, name='timestamp')
                data_df = pd.DataFrame(data, columns=columns)
                df = pd.concat([timestamps_df, data_df], axis=1)

                # Add to DB
                feats_db.df = df
                feats_db.save(remove_duplicates=True)

            except NoModelMatchedError:
                continue
            except Exception as e:
                print('Failed to process content "{0}" from file "{1}"'.format(
                    sample['contents'], sample['path']))
                print(e)
                continue
    except EOFError:
        pass
Example #12
0
    def read(
        file: str,
        content: str = None,
    ):
        """Read a given file.

        Args:
            file (str): Path to a file
            content (str): Content in the file to read

        """
        from pydtk.io import BaseFileReader
        reader = BaseFileReader()
        timestamps, data, columns = reader.read(path=file,
                                                contents=content,
                                                as_ndarray=False)

        pprint.pprint(data)
Example #13
0
def _test_db_and_io():
    """Load DB and load file."""
    from pydtk.db import V1MetaDBHandler
    from pydtk.io import BaseFileReader, NoModelMatchedError

    handler = V1MetaDBHandler('test/meta_db.arrow')
    reader = BaseFileReader()

    try:
        for sample in handler:
            print(sample)
            try:
                timestamps, data = reader.read(**sample)
                assert len(timestamps) == len(data)
            except NoModelMatchedError as e:
                print(str(e))
                continue
    except EOFError:
        pass
Example #14
0
    def is_available(
        file: str,
        content: str = None,
    ):
        """Test if available models exist against the given file.

        Args:
            file (str): Path to a file
            content (str): Content in the file to read

        """
        from pydtk.io import BaseFileReader
        from pydtk.io import NoModelMatchedError
        reader = BaseFileReader()
        try:
            _ = reader.read(path=file, contents=content, as_ndarray=False)
            print('True')
        except NoModelMatchedError:
            print('False')
Example #15
0
def _test_db_and_io_v2():
    """Load DB and load file."""
    from pydtk.db import V2MetaDBHandler
    from pydtk.io import BaseFileReader, NoModelMatchedError

    handler = V2MetaDBHandler(db_engine='sqlite',
                              db_host='test/test_v2.db',
                              base_dir_path='test')
    reader = BaseFileReader()

    try:
        for sample in handler:
            print('loading content "{0}" from file "{1}"'.format(
                sample['contents'], sample['path']))
            try:
                timestamps, data, columns = reader.read(**sample)
                assert len(timestamps) == len(data)
            except NoModelMatchedError as e:
                print(str(e))
                continue
    except EOFError:
        pass
Example #16
0
def test_load_from_db():
    """Load from database."""
    from pydtk.db import V4DBHandler as DBHandler
    from pydtk.io import BaseFileReader

    record_id = 'rosbag_model_test'
    target_content = '/vehicle/acceleration'
    start_timestamp = 1517463303.0
    end_timestamp = 1517463303.5

    # Get DBHandler
    handler = DBHandler(
        db_class='meta',
        db_engine='tinymongo',
        db_host='test/test_v4',
        base_dir_path='/opt/pydtk/test',
        read_on_init=False,
    )
    handler.read(pql='record_id == "{}"'.format(record_id))

    # Get the corresponding metadata
    for metadata in handler:
        for content in metadata['contents'].keys():
            if content != target_content:
                continue
            metadata.update({
                'start_timestamp': start_timestamp,
                'end_timestamp': end_timestamp
            })

            # Get FileReader
            reader = BaseFileReader()
            timestamps, data, columns = reader.read(metadata)

            assert len(timestamps) > 0
            assert len(data) > 0
            assert len(columns) > 0
            pass
Example #17
0
def test_base_statistic_calculation_with_sync_timestamp():
    """Run the base statistic calculation test."""
    from pydtk.io import BaseFileReader
    from pydtk.statistics import BaseStatisticCalculation

    path = 'test/records/B05_17000000010000000829/data/records.bag'
    reader = BaseFileReader()
    timestamps, data, columns = reader.read(path=path,
                                            contents='/vehicle/acceleration')

    target_span = 0.3
    calculator = BaseStatisticCalculation(target_span, sync_timestamps=False)
    stat_df = calculator.statistic_tables(timestamps, data, columns)
    result = stat_df.to_dict(orient='record')

    calculator_sync = BaseStatisticCalculation(target_span,
                                               sync_timestamps=True)
    stat_df_sync = calculator_sync.statistic_tables(timestamps, data, columns)
    result_sync = stat_df_sync.to_dict(orient='record')

    for without_sync, with_sync in zip(result, result_sync):
        assert without_sync[
            'timestamp'] // target_span * target_span == with_sync['timestamp']
Example #18
0
def main(database_id,
         q_content,
         span=60.0,
         meta_db_base_dir=None,
         meta_db_engine=None,
         meta_db_host=None,
         meta_db_username=None,
         meta_db_password=None,
         meta_db_name=None,
         output_db_engine=None,
         output_db_host=None,
         output_db_username=None,
         output_db_password=None,
         output_db_name=None,
         verbose=False):
    """Make Statistics Dataframe Table.

    Args:
        database_id (str): ID of the target database (e.g. "Driving Behavior Database")
        q_content (str): Content name of query
        span (float): Size of divided frame[sec]
        meta_db_base_dir (str): base directory of path
        meta_db_engine (str): Database engine of metadata
        meta_db_host (str): HOST of database of metadata
        meta_db_username (str): Username for the database of metadata
        meta_db_password (str): Password for the database of metadata
        meta_db_name (str): Database name of metadata
        output_db_engine (str): Database engine for storing statistics data
        output_db_host (str): HOST of database of statistics data
        output_db_username (str): Username for the database of statistics data
        output_db_password (str): Password for the database of statistics data
        output_db_name (str): Database name of statistics data
        verbose (bool): Verbose mode

    """
    if verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.ERROR)

    t_n, t_b = time.time(), time.time()

    # Load meta DB
    meta_db_handler = DBHandler(db_class='meta',
                                db_engine=meta_db_engine,
                                db_host=meta_db_host,
                                db_name=meta_db_name,
                                db_username=meta_db_username,
                                db_password=meta_db_password,
                                database_id=database_id,
                                base_dir_path=meta_db_base_dir,
                                read_on_init=False)
    logging.info("Loading content: {}".format(q_content))
    meta_db_handler.read(where='contents like "{}"'.format(q_content))
    reader = BaseFileReader()
    calculator = BaseStatisticCalculation(span, sync_timestamps=True)
    t_n, t_p = time.time(), t_n
    logging.info("Loaded index and filtered files.({0:.03f} secs)".format(t_n -
                                                                          t_p))

    # Initialize DB-Handler
    stat_db_handler = DBHandler(db_class='statistics',
                                db_engine=output_db_engine,
                                db_host=output_db_host,
                                db_name=output_db_name,
                                db_username=output_db_username,
                                db_password=output_db_password,
                                database_id=database_id,
                                span=span,
                                read_on_init=False)

    # Read data and write calculated data in DB
    def write_stat_to_db(item):
        # Load data from file and get statistical table
        timestamps, data, columns = reader.read(metadata=item)
        stat_df = calculator.statistic_tables(timestamps, data, columns)
        stat_df.insert(0, "record_id", item["record_id"])

        # Write to DB
        stat_db_handler.df = stat_df
        stat_db_handler.save()

    tqdm.pandas(desc="Load files, calculate and write")
    for sample in meta_db_handler:
        write_stat_to_db(sample)
    t_n, t_p = time.time(), t_n
    logging.info(
        "Calculated statistics and wrote to DB.({0:.03f} secs)".format(t_n -
                                                                       t_p))

    logging.info("Done.(Total: {0:.03f} secs)".format(t_n - t_b))