def _helper_test_base_writer(data, metadata, metadata_is_model): csv_path = metadata['path'] metadata_path = csv_path + '.json' if metadata_is_model: metadata = MetaDataModel(metadata) if os.path.isfile(csv_path): os.remove(path=csv_path) if os.path.isfile(csv_path): os.remove(path=metadata_path) writer = BaseFileWriter() writer.write(data=data, metadata=metadata) reader = BaseFileReader() timestamps, new_data, columns = reader.read(metadata=metadata) np.testing.assert_array_equal(data, new_data) with open(metadata_path, 'r') as fd: new_metadata = json.load(fd) # NOTE: json.dumps(MetaDataModel(metadata_dict).data) is not equal json.dumps(metadata_dict) if metadata_is_model: metadata_comparable = json.dumps(metadata.data) new_metadata_comparable = json.dumps( MetaDataModel(new_metadata).data) else: metadata_comparable = json.dumps(metadata) new_metadata_comparable = json.dumps(new_metadata) if metadata_comparable != new_metadata_comparable: raise ValueError( 'Saving metadata failed! (src and dest is unmatch)')
def _test_v2_db_statistic(): """Run the v2 DB statistics test.""" import pandas as pd from pydtk.io import BaseFileReader from pydtk.statistics import BaseStatisticCalculation path = 'test/records/B05_17000000010000000829/data/records.bag' reader = BaseFileReader() timestamps, data, columns = reader.read(path=path, contents='/vehicle/acceleration') target_span = 0.3 calculator = BaseStatisticCalculation(target_span) stat_df = calculator.statistic_tables(timestamps, data, columns) assert isinstance(stat_df, pd.core.frame.DataFrame) from pydtk.db.v2 import TimeSeriesDBHandler from requests.exceptions import ConnectionError try: db_handler = TimeSeriesDBHandler(df_name='test_statistics_span_0.3') db_handler.df = stat_df db_handler.save() except ConnectionError as e: print(str(e))
def _test_v3_db_statistic(): """Run the v3 DB statistics test.""" from cassandra.cluster import NoHostAvailable import pandas as pd from pydtk.io import BaseFileReader from pydtk.statistics import BaseStatisticCalculation path = 'test/records/B05_17000000010000000829/data/records.bag' reader = BaseFileReader() timestamps, data, columns = reader.read(path=path, contents='/vehicle/acceleration') target_span = 0.3 calculator = BaseStatisticCalculation(target_span) stat_df = calculator.statistic_tables(timestamps, data, columns) assert isinstance(stat_df, pd.core.frame.DataFrame) from pydtk.db import V3TimeSeriesCassandraDBHandler try: db_handler = V3TimeSeriesCassandraDBHandler( df_name='test_statistics_span_0') db_handler.df = stat_df db_handler.save() except NoHostAvailable as e: print(str(e))
def test_v3_db_statistic_sqlite_2(): """Run the v3 DB statistics test.""" import pandas as pd from pydtk.io import BaseFileReader from pydtk.statistics import BaseStatisticCalculation from pydtk.db import V3DBHandler as DBHandler path = 'test/records/B05_17000000010000000829/data/records.bag' reader = BaseFileReader() timestamps, data, columns = reader.read(path=path, contents='/vehicle/acceleration') # timestamps, data, columns = reader.read(path=path, contents='/vehicle/analog/speed_pulse') target_span = 0.3 calculator = BaseStatisticCalculation(target_span) stat_df = calculator.statistic_tables(timestamps, data, columns) stat_df['record_id'] = 'B05_17000000010000000829' assert isinstance(stat_df, pd.core.frame.DataFrame) db_handler = DBHandler( database_id='test', span=0.3, db_class='statistics', db_engine='sqlite', db_host='test/test_statistics.db', db_username='', db_password='', db_name='', read_on_init=False, ) db_handler.df = stat_df db_handler.save(remove_duplicates=True)
def test_base_reader(): """Run the base reader test.""" import numpy as np from pydtk.io import BaseFileReader path = 'test/records/annotation_model_test/annotation_test.csv' reader = BaseFileReader() timestamps, data, columns = reader.read(path=path) assert isinstance(data, np.ndarray)
def test_base_reader_non_ndarray(): """Run the base reader test.""" from pydtk.io import BaseFileReader path = 'test/records/json_model_test/json_test.json' reader = BaseFileReader() timestamps, data, columns = reader.read(path=path, as_ndarray=False) assert isinstance(data, dict)
def test_base_reader_rosbag_can(): """Run the base reader test.""" import numpy as np from pydtk.io import BaseFileReader path = 'test/records/rosbag_model_test/data/records.bag' reader = BaseFileReader() timestamps, data, columns = reader.read(path=path, contents='/vehicle/can_raw') assert isinstance(data, np.ndarray)
def test_separated_data(): """Run the base reader test.""" import numpy as np from pydtk.models import MetaDataModel from pydtk.io import BaseFileReader metadata_path = 'test/records/sample/separated_data/records.bag.json' metadata = MetaDataModel() metadata.load(metadata_path) reader = BaseFileReader() timestamps, data, columns = reader.read( metadata=metadata, contents='/points_concat_downsampled') assert isinstance(data, np.ndarray)
def test_load_from_metadata_dict(): """Run the base reader test.""" import numpy as np from pydtk.models import MetaDataModel from pydtk.io import BaseFileReader metadata_path = 'test/records/rosbag_model_test/data/records.bag.json' metadata = MetaDataModel() metadata.load(metadata_path) metadata = metadata.data reader = BaseFileReader() timestamps, data, columns = reader.read( metadata=metadata, contents='/vehicle/analog/speed_pulse') assert isinstance(data, np.ndarray)
def test_base_statistic_calculation(): """Run the base statistic calculation test.""" import pandas as pd from pydtk.io import BaseFileReader from pydtk.statistics import BaseStatisticCalculation path = 'test/records/B05_17000000010000000829/data/records.bag' reader = BaseFileReader() timestamps, data, columns = reader.read(path=path, contents='/vehicle/acceleration') target_span = 0.3 calculator = BaseStatisticCalculation(target_span) stat_df = calculator.statistic_tables(timestamps, data, columns) assert isinstance(stat_df, pd.core.frame.DataFrame)
def _test_custom_df_v2(): """Create a custom dataframe.""" from pydtk.db import V2BaseDBHandler, V2MetaDBHandler from pydtk.io import BaseFileReader, NoModelMatchedError import pandas as pd meta_db = V2MetaDBHandler(db_engine='sqlite', db_host='test/test_v2.db', base_dir_path='test') reader = BaseFileReader() # meta_db.read(where='tags like "%gnss%"') try: for sample in meta_db: print('loading content "{0}" from file "{1}"'.format( sample['contents'], sample['path'])) try: # Initialize DB for storing features feats_db = V2BaseDBHandler(db_engine='sqlite', db_host='test/test.db', df_name=sample['contents'], read_on_init=False) # Load data from file timestamps, data, columns = reader.read(**sample) # Create DataFrame timestamps_df = pd.Series(timestamps, name='timestamp') data_df = pd.DataFrame(data, columns=columns) df = pd.concat([timestamps_df, data_df], axis=1) # Add to DB feats_db.df = df feats_db.save(remove_duplicates=True) except NoModelMatchedError: continue except Exception as e: print('Failed to process content "{0}" from file "{1}"'.format( sample['contents'], sample['path'])) print(e) continue except EOFError: pass
def read( file: str, content: str = None, ): """Read a given file. Args: file (str): Path to a file content (str): Content in the file to read """ from pydtk.io import BaseFileReader reader = BaseFileReader() timestamps, data, columns = reader.read(path=file, contents=content, as_ndarray=False) pprint.pprint(data)
def _test_db_and_io(): """Load DB and load file.""" from pydtk.db import V1MetaDBHandler from pydtk.io import BaseFileReader, NoModelMatchedError handler = V1MetaDBHandler('test/meta_db.arrow') reader = BaseFileReader() try: for sample in handler: print(sample) try: timestamps, data = reader.read(**sample) assert len(timestamps) == len(data) except NoModelMatchedError as e: print(str(e)) continue except EOFError: pass
def is_available( file: str, content: str = None, ): """Test if available models exist against the given file. Args: file (str): Path to a file content (str): Content in the file to read """ from pydtk.io import BaseFileReader from pydtk.io import NoModelMatchedError reader = BaseFileReader() try: _ = reader.read(path=file, contents=content, as_ndarray=False) print('True') except NoModelMatchedError: print('False')
def _test_db_and_io_v2(): """Load DB and load file.""" from pydtk.db import V2MetaDBHandler from pydtk.io import BaseFileReader, NoModelMatchedError handler = V2MetaDBHandler(db_engine='sqlite', db_host='test/test_v2.db', base_dir_path='test') reader = BaseFileReader() try: for sample in handler: print('loading content "{0}" from file "{1}"'.format( sample['contents'], sample['path'])) try: timestamps, data, columns = reader.read(**sample) assert len(timestamps) == len(data) except NoModelMatchedError as e: print(str(e)) continue except EOFError: pass
def test_load_from_db(): """Load from database.""" from pydtk.db import V4DBHandler as DBHandler from pydtk.io import BaseFileReader record_id = 'rosbag_model_test' target_content = '/vehicle/acceleration' start_timestamp = 1517463303.0 end_timestamp = 1517463303.5 # Get DBHandler handler = DBHandler( db_class='meta', db_engine='tinymongo', db_host='test/test_v4', base_dir_path='/opt/pydtk/test', read_on_init=False, ) handler.read(pql='record_id == "{}"'.format(record_id)) # Get the corresponding metadata for metadata in handler: for content in metadata['contents'].keys(): if content != target_content: continue metadata.update({ 'start_timestamp': start_timestamp, 'end_timestamp': end_timestamp }) # Get FileReader reader = BaseFileReader() timestamps, data, columns = reader.read(metadata) assert len(timestamps) > 0 assert len(data) > 0 assert len(columns) > 0 pass
def test_base_statistic_calculation_with_sync_timestamp(): """Run the base statistic calculation test.""" from pydtk.io import BaseFileReader from pydtk.statistics import BaseStatisticCalculation path = 'test/records/B05_17000000010000000829/data/records.bag' reader = BaseFileReader() timestamps, data, columns = reader.read(path=path, contents='/vehicle/acceleration') target_span = 0.3 calculator = BaseStatisticCalculation(target_span, sync_timestamps=False) stat_df = calculator.statistic_tables(timestamps, data, columns) result = stat_df.to_dict(orient='record') calculator_sync = BaseStatisticCalculation(target_span, sync_timestamps=True) stat_df_sync = calculator_sync.statistic_tables(timestamps, data, columns) result_sync = stat_df_sync.to_dict(orient='record') for without_sync, with_sync in zip(result, result_sync): assert without_sync[ 'timestamp'] // target_span * target_span == with_sync['timestamp']
def main(database_id, q_content, span=60.0, meta_db_base_dir=None, meta_db_engine=None, meta_db_host=None, meta_db_username=None, meta_db_password=None, meta_db_name=None, output_db_engine=None, output_db_host=None, output_db_username=None, output_db_password=None, output_db_name=None, verbose=False): """Make Statistics Dataframe Table. Args: database_id (str): ID of the target database (e.g. "Driving Behavior Database") q_content (str): Content name of query span (float): Size of divided frame[sec] meta_db_base_dir (str): base directory of path meta_db_engine (str): Database engine of metadata meta_db_host (str): HOST of database of metadata meta_db_username (str): Username for the database of metadata meta_db_password (str): Password for the database of metadata meta_db_name (str): Database name of metadata output_db_engine (str): Database engine for storing statistics data output_db_host (str): HOST of database of statistics data output_db_username (str): Username for the database of statistics data output_db_password (str): Password for the database of statistics data output_db_name (str): Database name of statistics data verbose (bool): Verbose mode """ if verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.ERROR) t_n, t_b = time.time(), time.time() # Load meta DB meta_db_handler = DBHandler(db_class='meta', db_engine=meta_db_engine, db_host=meta_db_host, db_name=meta_db_name, db_username=meta_db_username, db_password=meta_db_password, database_id=database_id, base_dir_path=meta_db_base_dir, read_on_init=False) logging.info("Loading content: {}".format(q_content)) meta_db_handler.read(where='contents like "{}"'.format(q_content)) reader = BaseFileReader() calculator = BaseStatisticCalculation(span, sync_timestamps=True) t_n, t_p = time.time(), t_n logging.info("Loaded index and filtered files.({0:.03f} secs)".format(t_n - t_p)) # Initialize DB-Handler stat_db_handler = DBHandler(db_class='statistics', db_engine=output_db_engine, db_host=output_db_host, db_name=output_db_name, db_username=output_db_username, db_password=output_db_password, database_id=database_id, span=span, read_on_init=False) # Read data and write calculated data in DB def write_stat_to_db(item): # Load data from file and get statistical table timestamps, data, columns = reader.read(metadata=item) stat_df = calculator.statistic_tables(timestamps, data, columns) stat_df.insert(0, "record_id", item["record_id"]) # Write to DB stat_db_handler.df = stat_df stat_db_handler.save() tqdm.pandas(desc="Load files, calculate and write") for sample in meta_db_handler: write_stat_to_db(sample) t_n, t_p = time.time(), t_n logging.info( "Calculated statistics and wrote to DB.({0:.03f} secs)".format(t_n - t_p)) logging.info("Done.(Total: {0:.03f} secs)".format(t_n - t_b))