def _helper_test_base_writer(data, metadata, metadata_is_model): csv_path = metadata['path'] metadata_path = csv_path + '.json' if metadata_is_model: metadata = MetaDataModel(metadata) if os.path.isfile(csv_path): os.remove(path=csv_path) if os.path.isfile(csv_path): os.remove(path=metadata_path) writer = BaseFileWriter() writer.write(data=data, metadata=metadata) reader = BaseFileReader() timestamps, new_data, columns = reader.read(metadata=metadata) np.testing.assert_array_equal(data, new_data) with open(metadata_path, 'r') as fd: new_metadata = json.load(fd) # NOTE: json.dumps(MetaDataModel(metadata_dict).data) is not equal json.dumps(metadata_dict) if metadata_is_model: metadata_comparable = json.dumps(metadata.data) new_metadata_comparable = json.dumps( MetaDataModel(new_metadata).data) else: metadata_comparable = json.dumps(metadata) new_metadata_comparable = json.dumps(new_metadata) if metadata_comparable != new_metadata_comparable: raise ValueError( 'Saving metadata failed! (src and dest is unmatch)')
def write(self, metadata=None, data=None, model_kwargs=None, **kwargs): """Write a file which corresponds to the given metadata. Args: metadata (dict or MetaDataModel): metadata of the data to save data (numpy array): data model_kwargs (dict): kwargs to pass to the selected model Returns: void """ if model_kwargs is None: model_kwargs = {} # Check metadata is valid if metadata is None: raise ValueError('Metadata must be specified') if type(metadata) is dict: if 'path' not in metadata.keys(): raise ValueError('Metadata must have path key') else: metadata = MetaDataModel(metadata) elif type(metadata) is MetaDataModel: pass else: raise ValueError('Type of metadata must be dict or MetaDataModel') metadata.save(metadata.data['path'] + metadata._file_extensions[0]) # Select a suitable model and save data self.model = self._select_model(metadata) self.model = self.model(metadata=metadata, data=data, **model_kwargs) self.model.save()
def test_forecast_model(): """Run the ForecastCsvModel test.""" meta_path = 'test/records/forecast_model_test/forecast_test.csv.json' from pydtk.models import MetaDataModel from pydtk.models.csv import ForecastCsvModel import numpy as np # load metadata metadata = MetaDataModel() metadata.load(meta_path) # load forecast_model = ForecastCsvModel(metadata=metadata) forecast_model.load() assert isinstance(forecast_model.to_ndarray(), np.ndarray) forecast_model.save('/tmp/test_forecast.csv') from datetime import datetime strp_foramt = "%Y/%m/%d %H:%M:%S" forecast_model.load( start_timestamp=datetime.strptime("2020/11/03 00:30:00", strp_foramt).timestamp(), end_timestamp=datetime.strptime("2020/11/03 01:20:00", strp_foramt).timestamp(), ) assert isinstance(forecast_model.to_ndarray(), np.ndarray) forecast_model.save("/tmp/test_forecast_query.csv")
def test_create_db_v3_with_env_var(): """Create DB of records directory.""" import os from pydtk.db import V3DBHandler from pydtk.models import MetaDataModel # Set environment variables os.environ['PYDTK_META_DB_ENGINE'] = 'sqlite' os.environ['PYDTK_META_DB_HOST'] = 'test/test_v3_env.db' handler = V3DBHandler(db_class='meta', base_dir_path='test') paths = [ 'test/records/016_00000000030000000240/data/camera_01_timestamps.csv.json', ] # Load metadata and add to DB for path in paths: metadata = MetaDataModel() metadata.load(path) handler.add_data(metadata.data) # Save handler.save(remove_duplicates=True) assert os.path.exists('test/test_v3_env.db')
def _test_create_db(): """Create DB of records directory.""" from pydtk.db import V1MetaDBHandler from pydtk.models import MetaDataModel handler = V1MetaDBHandler('test/meta_db.arrow') paths = [ 'test/records/016_00000000030000000240/data/camera_01_timestamps.csv.json', 'test/records/B05_17000000010000000829/data/records.bag.json', 'test/records/sample/data/records.bag.json' ] # Load metadata and add to DB for path in paths: metadata = MetaDataModel() metadata.load(path) handler.add_data(metadata.data) # Get dfs _ = handler.get_content_df() _ = handler.get_file_df() _ = handler.get_record_id_df() # Save handler.save()
def _test_create_db_v2(): """Create DB of records directory.""" from pydtk.db import V2MetaDBHandler from pydtk.models import MetaDataModel handler = V2MetaDBHandler(db_engine='sqlite', db_host='test/test_v2.db', base_dir_path='test') paths = [ 'test/records/016_00000000030000000240/data/camera_01_timestamps.csv.json', 'test/records/B05_17000000010000000829/data/records.bag.json', 'test/records/sample/data/records.bag.json' ] # Load metadata and add to DB for path in paths: metadata = MetaDataModel() metadata.load(path) handler.add_data(metadata.data, check_unique=True) # Get dfs _ = handler.get_content_df() _ = handler.get_file_df() _ = handler.get_record_id_df() # Save handler.save(remove_duplicates=True)
def _add_data_from_stdin(handler): data = json.load(sys.stdin) if isinstance(data, dict): metadata = MetaDataModel(data=data) handler.add_data(metadata.data) elif isinstance(data, list): for element in data: metadata = MetaDataModel(data=element) handler.add_data(metadata.data)
def load_metadata(self, path): """Load and return metadata.""" metadata = None for ext in MetaDataModel._file_extensions: metadata_filepath = path + ext if os.path.isfile(metadata_filepath): metadata = MetaDataModel() metadata.load(metadata_filepath) if metadata is None: raise IOError('No metadata found for file: {}'.format(path)) return metadata
def test_separated_data(): """Run the base reader test.""" import numpy as np from pydtk.models import MetaDataModel from pydtk.io import BaseFileReader metadata_path = 'test/records/sample/separated_data/records.bag.json' metadata = MetaDataModel() metadata.load(metadata_path) reader = BaseFileReader() timestamps, data, columns = reader.read( metadata=metadata, contents='/points_concat_downsampled') assert isinstance(data, np.ndarray)
def test_load_from_metadata_dict(): """Run the base reader test.""" import numpy as np from pydtk.models import MetaDataModel from pydtk.io import BaseFileReader metadata_path = 'test/records/rosbag_model_test/data/records.bag.json' metadata = MetaDataModel() metadata.load(metadata_path) metadata = metadata.data reader = BaseFileReader() timestamps, data, columns = reader.read( metadata=metadata, contents='/vehicle/analog/speed_pulse') assert isinstance(data, np.ndarray)
def test_std_msgs_rosbag_model(): """Run the metadata and data loader test.""" meta_path = 'test/records/sample/data/records.bag.json.json' path = 'test/records/sample/data/records.bag.json' from pydtk.models import MetaDataModel from pydtk.models.rosbag import StdMsgsRosbagModel # load metadata metadata = MetaDataModel() metadata.load(meta_path) # load data = StdMsgsRosbagModel(metadata=metadata) data.load(path, contents='/vehicle/analog/speed_pulse')
def test_geometry_msgs_accel_stamped_rosbag_model(): """Run the metadata and data loader test.""" meta_path = 'test/records/sample/data/records.bag.json.json' path = 'test/records/sample/data/records.bag.json' from pydtk.models import MetaDataModel from pydtk.models.rosbag import GeometryMsgsAccelStampedRosbagModel # load metadata metadata = MetaDataModel() metadata.load(meta_path) # load data = GeometryMsgsAccelStampedRosbagModel(metadata=metadata) data.load(path, contents='/vehicle/acceleration')
def test_sensor_msgs_nav_sat_fix_rosbag_model(): """Run the metadata and data loader test.""" meta_path = 'test/records/sample/data/records.bag.json.json' path = 'test/records/sample/data/records.bag.json' from pydtk.models import MetaDataModel from pydtk.models.rosbag import SensorMsgsNavSatFixRosbagModel # load metadata metadata = MetaDataModel() metadata.load(meta_path) # load data = SensorMsgsNavSatFixRosbagModel(metadata=metadata) data.load(path, contents='/vehicle/gnss')
def test_csv_model(): """Run the metadata and data loader test.""" meta_path = 'test/records/csv_model_test/data/test.csv.json' path = 'test/records/csv_model_test/data/test.csv' from pydtk.models import MetaDataModel from pydtk.models.csv import CameraTimestampCsvModel # load metadata metadata = MetaDataModel() metadata.load(meta_path) # load csv = CameraTimestampCsvModel(metadata=metadata) csv.load(path) csv.save('/tmp/test.csv')
def test_sensor_msgs_pointcloud2_rosbag_model(): """Run the metadata and data loader test.""" meta_path = 'test/records/sample/data/records.bag.json' path = 'test/records/sample/data/records.bag' from pydtk.models import MetaDataModel from pydtk.models.rosbag import SensorMsgsPointCloud2RosbagModel # load metadata metadata = MetaDataModel() metadata.load(meta_path) # load model = SensorMsgsPointCloud2RosbagModel(metadata=metadata) model.configure(fields=('x', 'y', 'z', 'intensity')) model.load(path, contents='/points_concat_downsampled')
def test_json_model(): """Run the GenericJsonModel test.""" meta_path = 'test/records/json_model_test/json_test.json.json' from pydtk.models import MetaDataModel from pydtk.models.json_model import GenericJsonModel # load metadata metadata = MetaDataModel() metadata.load(meta_path) # load json_model = GenericJsonModel(metadata=metadata) json_model.load() assert isinstance(json_model.data, dict) json_model.save('/tmp/test_json.json')
def test_annotation_model(): """Run the AnnotationCsvModel test.""" meta_path = 'test/records/annotation_model_test/annotation_test.csv.json' from pydtk.models import MetaDataModel from pydtk.models.csv import AnnotationCsvModel import numpy as np # load metadata metadata = MetaDataModel() metadata.load(meta_path) # load annotation_model = AnnotationCsvModel(metadata=metadata) annotation_model.load() assert isinstance(annotation_model.to_ndarray(), np.ndarray) annotation_model.save('/tmp/test_annotation.csv')
def add(self, target: str, content: str = None, database_id: str = 'default', base_dir: str = '/', **kwargs): """Add resources. Args: target (str): 'databases', 'records', 'files' or 'contents * content (str): Content to add. This must be one of the followings: 1. Database ID (in case of adding database) 2. Path to a JSON file (in case of adding metadata) 3. Path to a directory containing JSON files (in case of adding metadata) 3. Empty (in case of adding metadata) In the last case, PyDTK reads STDIN as JSON to add metadata. database_id (str): Database ID base_dir (str): Base directory """ _assert_target(target) if target in ['database', 'databases']: if content is not None: database_id = content # Initialize Handler handler = DBHandler(db_class='meta', database_id=database_id, base_dir_path=base_dir) if target not in ['database', 'databases']: if content is None: _add_data_from_stdin(handler) else: if os.path.isfile(content): f = open(content, 'r') data = json.load(f) f.close() metadata = MetaDataModel(data=data) handler.add_data(metadata.data) elif os.path.isdir(content): from pydtk.builder.meta_db import main as add_metadata_from_dir add_metadata_from_dir(target_dir=content, database_id=database_id, base_dir=base_dir) else: raise IOError('No such file or directory') # Save handler.save() self._handler = handler
def test_metadata_model(): """Run the metadata loader test.""" path = 'test/records/json_model_test/json_test.json.json' from pydtk.models import MetaDataModel assert MetaDataModel.is_loadable(path) # load metadata = MetaDataModel() metadata.load(path) metadata.save('/tmp/test.json')
def _get_contents_info(file_path): """Get contents infomation from model. Args: file_path (str): path to the file Returns: (dict): contents info """ metadata = MetaDataModel(data={'path': file_path}) model = BaseFileReader._select_model(metadata) contents = model.generate_contents_meta(path=file_path) return contents
def _get_timestamps_info(file_path): """Get contents infomation from model. Args: file_path (str): path to the file Returns: (list): [start_timestamp, end_timestamp] """ metadata = MetaDataModel(data={'path': file_path}) model = BaseFileReader._select_model(metadata) timetamps_info = model.generate_timestamp_meta(path=file_path) return timetamps_info
def test_autoware_can_msgs_can_packet_rosbag_model(): """Run the metadata and data loader test.""" meta_path = 'test/records/can_model_test/test.bag.json' path = 'test/records/can_model_test/test.bag' from pydtk.models import MetaDataModel from pydtk.models.autoware import AutowareCanMsgsCANPacketRosbagModel # load metadata metadata = MetaDataModel() metadata.load(meta_path) # load model = AutowareCanMsgsCANPacketRosbagModel( metadata=metadata, path_to_assign_list='test/assets/can_assign_list.csv') model.load(path, contents='/vehicle/can_raw') timestamps = model.timestamps data = model.to_ndarray() columns = model.columns assert len(timestamps) == len(data) assert len(columns) == data.shape[-1] # load with configuration model = AutowareCanMsgsCANPacketRosbagModel(metadata=metadata) model.configure(path_to_assign_list='test/assets/can_assign_list.csv') model.load(path, contents='/vehicle/can_raw') # retrieve timestamps = model.timestamps data = model.to_ndarray() columns = model.columns assert len(timestamps) == len(data) assert len(columns) == data.shape[-1]
def main(target_dir, database_id='default', base_dir=None, output_db_engine=None, output_db_host=None, output_db_username=None, output_db_password=None, output_db_name=None, verbose=False): """Create meta_db. Args: target_dir (str): Path to database directory database_id (str): ID of the database (e.g. "Driving Behavior Database") base_dir (str): Directory path where each file-path will be based on output_db_engine (str): Database engine output_db_host (str): HOST of database output_db_username (str): Username for the database output_db_password (str): Password for the database output_db_name (str): Database name verbose (bool): Verbose mode """ if verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) # Check if not os.path.isdir(target_dir): raise IOError('No such directory: {}'.format(target_dir)) # Select DB-Handler db_engine = os.environ.get('PYDTK_META_DB_ENGINE', None) if output_db_engine is not None: db_engine = output_db_engine if db_engine is None: DBHandler = V4DBHandler else: if db_engine in DB_ENGINES.keys(): DBHandler = V4DBHandler else: DBHandler = V3DBHandler # Search metadata files t0 = time.time() logging.info("Searching json files...") json_list = _find_json(target_dir) t1 = time.time() logging.info("Found {0} files.({1:.03f} secs)".format( len(json_list), t1 - t0)) # Preparation base_dir_path = base_dir if base_dir is not None else target_dir handler = DBHandler(db_class='meta', db_engine=output_db_engine, db_host=output_db_host, db_username=output_db_username, db_password=output_db_password, db_name=output_db_name, database_id=database_id, base_dir_path=base_dir_path) # Append metadata to db logging.info('Loading metadata...') for path in tqdm(json_list, desc='Load metadata', leave=False): if not MetaDataModel.is_loadable(path): logging.warning( 'Failed to load metadata file: {}, skipped'.format(path)) continue metadata = MetaDataModel() metadata.load(path) handler.add_data(metadata.data) t2 = time.time() logging.info("Finished loading metadata.({0:.03f} secs)".format(t2 - t1)) # Export logging.info('Saving DB file...') handler.save(remove_duplicates=True) t3 = time.time() logging.info("Finished saving DB file.({0:.03f} secs)".format(t3 - t2)) # Display logging.info("Done.(Total: {0:.03f} secs)".format(t3 - t0))
def read(self, metadata=None, as_generator=False, model_kwargs=None, as_ndarray=True, **kwargs): """Read a file which corresponds to the given metadata. Args: metadata (MetaDataModel or dict): metadata of the data to load as_generator (bool): load data as a generator. model_kwargs (dict): kwargs to pass to the selected model Kwargs: path (str): path to a file contents (str or dict): content to load start_timestamp (float): start-timestamp end_timestamp (float): end-timestamp Returns: (object): an object of the corresponding model """ if model_kwargs is None: model_kwargs = {} if metadata is None: if 'path' not in kwargs.keys(): raise ValueError('Either metadata or path must be specified') # Look for the corresponding metadata file for ext in MetaDataModel._file_extensions: metadata_filepath = kwargs['path'] + ext if os.path.isfile(metadata_filepath): metadata = MetaDataModel() metadata.load(metadata_filepath) if metadata is None: raise IOError('Could not find metadata file') else: metadata = MetaDataModel(metadata) # Replace 'contents' in metadata to specify which content to load contents = metadata.data[ 'contents'] if 'contents' in metadata.data.keys() else None if 'contents' in kwargs.keys(): if isinstance(kwargs['contents'], dict): contents = kwargs['contents'] if isinstance(kwargs['contents'], str): contents = next( iter([{ k: v for k, v in contents.items() if k == kwargs['contents'] }])) if len(contents) == 0: raise ValueError('No corresponding contents exist') # Replace other attributes with the given arguments metadata.data.update(kwargs) metadata.data.update({'contents': contents}) # Select a suitable model and load data self.model = self._select_model(metadata) self.model = self.model(metadata=metadata, **model_kwargs) columns = self.model.columns if as_generator: def load_sample_wise(): for sample in self.model.load(as_generator=as_generator): # Parse data timestamp = np.array(sample['timestamps']) data = np.array(sample['data']) yield timestamp, data, columns return load_sample_wise() else: self.model.load() # Parse data timestamps = self.model.timestamps if as_ndarray: data = self.model.to_ndarray() else: data = self.model.data # Apply pre-processes for preprocess in self.preprocesses: timestamps, data = preprocess.processing(timestamps, data) return timestamps, data, columns