def __init__(self, data=None, data_type=None, data_kwargs={}, paths={}, user_id='', **kwargs): # default settings if not all([k in ['data', 'meta', 'log'] for k in paths]): raise ToyzDataError( "'paths' must be a dict of file parameters with the keys " "'data', 'meta', 'log'") path_options = { 'toyz_module': '', 'io_module': '', 'file_type': '', 'file_options': {} } default_paths = {f: dict(path_options) for f in ['data', 'meta', 'log']} self.user_id = user_id self.paths = core.merge_dict(default_paths, paths) self.set_data(data, data_type, data_kwargs) default_options = { 'selected': [], 'meta': { 'creation': { 'time': datetime.now(), 'software': 'unknown' } }, 'links': {}, 'log': [], 'fillna': 'NaN' } options = core.merge_dict(default_options, kwargs, True) for k,v in options.items(): setattr(self, k, v)
def __init__(self, module_info, data=None, data_type=None, data_kwargs={}, paths={}, user_id='', **kwargs): # default settings if not all([k in ['data', 'meta', 'log'] for k in paths]): raise ToyzDataError( "'paths' must be a dict of file parameters with the keys " "'data', 'meta', 'log'") path_options = { 'toyz_module': '', 'io_module': '', 'file_type': '', 'file_options': {} } default_paths = { f: dict(path_options) for f in ['data', 'meta', 'log'] } self.user_id = user_id self.paths = core.merge_dict(default_paths, paths) # Set the module used to specify the data type (usually toyz unless the user has # created a custom data type, like astropy tables) if self.paths['data']['toyz_module'] == 'toyz': self.data_module = toyz.utils.sources else: import importlib self.data_module = importlib.import_module( self.paths['data']['toyz_module']) # import all of the functions self.set_data(data, data_type, data_kwargs) default_options = { 'selected': [], 'meta': { 'creation': { 'time': datetime.now(), 'software': 'unknown' } }, 'links': {}, 'log': [], 'fillna': 'NaN' } options = core.merge_dict(default_options, kwargs, True) for k, v in options.items(): setattr(self, k, v)
def save(self, save_paths={}): """ Save the DataSource and, if applicable, the metadata and log. """ # Save the data source for data_type, file_info in self.paths.items(): # if the user already has load parameters, convert those to save parameters if file_info['io_module']!='': save_path = dict(file_info) save_path['file_options'] = toyz.utils.io.convert_options( file_info['toyz_module'], file_info['io_module'], file_info['file_type'], file_info['file_options'], 'load2save' ) # If the user specified save parameters, update save_paths with those if data_type in save_paths: core.merge_dict(save_path, save_paths[data_type]) # The user must specify a data path to save the data source to elif data_type == 'data': raise ToyzDataError( "You must supply 'toyz_module', 'io_module', 'file_type'," " and 'file_options' to save") elif data_type in save_paths: save_path = save_paths[data_type] else: save_path = None # If a path exists for the data type, save the meta/data/log file if save_path is not None: print(data_type, save_path) # Save data and convert the save paths to paths that can be used to load the file # (note: since pandas read and write functions are not symmetric, the same settings # used to load the file might not work when loading it again after a save) self.paths[data_type]['file_options'] = toyz.utils.io.save_data( self.data, save_path['toyz_module'], save_path['io_module'], save_path['file_type'], save_path['file_options'] ) else: print('No path for data_type', data_type) print('self.path', self.paths) return self.paths['data']['file_options']
def save(self, save_paths={}): """ Save the DataSource and, if applicable, the metadata and log. """ # Save the data source for data_type, file_info in self.paths.items(): # if the user already has load parameters, convert those to save parameters if file_info['io_module'] != '': save_path = dict(file_info) save_path['file_options'] = toyz.utils.io.convert_options( file_info['toyz_module'], file_info['io_module'], file_info['file_type'], file_info['file_options'], 'load2save') # If the user specified save parameters, update save_paths with those if data_type in save_paths: core.merge_dict(save_path, save_paths[data_type]) # The user must specify a data path to save the data source to elif data_type == 'data': raise ToyzDataError( "You must supply 'toyz_module', 'io_module', 'file_type'," " and 'file_options' to save") elif data_type in save_paths: save_path = save_paths[data_type] else: save_path = None # If a path exists for the data type, save the meta/data/log file if save_path is not None: print(data_type, save_path) # Save data and convert the save paths to paths that can be used to load the file # (note: since pandas read and write functions are not symmetric, the same settings # used to load the file might not work when loading it again after a save) self.paths[data_type][ 'file_options'] = toyz.utils.io.save_data( self.data, save_path['toyz_module'], save_path['io_module'], save_path['file_type'], save_path['file_options']) else: print('No path for data_type', data_type) print('self.path', self.paths) return self.paths['data']['file_options']
def astropy_write(data, file_type, **file_options): """ Write an astropy table to a file """ from astropy.table import Table filename = file_options['filename'] options = merge_dict({}, file_options) del options['filename'] # Make sure to use the proper format if file_type!='ascii': options['format'] = file_type data.write(filename, **options)
def astropy_write(data, file_type, **file_options): """ Write an astropy table to a file """ from astropy.table import Table filename = file_options['filename'] options = merge_dict({}, file_options) del options['filename'] # Make sure to use the proper format if file_type != 'ascii': options['format'] = file_type data.write(filename, **options)
def __init__(self, module_info, data=None, data_type=None, data_kwargs={}, paths={}, user_id='', **kwargs): # default settings if not all([k in ['data', 'meta', 'log'] for k in paths]): raise ToyzDataError( "'paths' must be a dict of file parameters with the keys " "'data', 'meta', 'log'") path_options = { 'toyz_module': '', 'io_module': '', 'file_type': '', 'file_options': {} } default_paths = {f: dict(path_options) for f in ['data', 'meta', 'log']} self.user_id = user_id self.paths = core.merge_dict(default_paths, paths) # Set the module used to specify the data type (usually toyz unless the user has # created a custom data type, like astropy tables) if self.paths['data']['toyz_module']=='toyz': self.data_module = toyz.utils.sources else: import importlib self.data_module = importlib.import_module(self.paths['data']['toyz_module']) # import all of the functions self.set_data(data, data_type, data_kwargs) default_options = { 'selected': [], 'meta': { 'creation': { 'time': datetime.now(), 'software': 'unknown' } }, 'links': {}, 'log': [], 'fillna': 'NaN' } options = core.merge_dict(default_options, kwargs, True) for k,v in options.items(): setattr(self, k, v)
def astropy_read(file_type, **file_options): """ Read into an astropy table """ from astropy.table import Table filename = file_options['filename'] options = merge_dict({}, file_options) del options['filename'] # Make sure to use the proper format if file_type!='ascii': options['format'] = file_type data = Table.read(filename, **options) return data
def astropy_read(file_type, **file_options): """ Read into an astropy table """ from astropy.table import Table filename = file_options['filename'] options = merge_dict({}, file_options) del options['filename'] # Make sure to use the proper format if file_type != 'ascii': options['format'] = file_type data = Table.read(filename, **options) return data
def save_data(data, toyz_module, io_module, file_type, file_options): """ Save data to a file Parameters data ( *object* ): - data object to save toyz_module ( *string* ): - name of toyz module to use for i/o io_module ( *string* ): - name of python module to use for i/o file_type ( *string* ): - type of file to open (for example 'hdf', 'csv', 'npy', etc.) - *Note*: the ``file_type`` must be supported by the given ``io_module`` file_options ( *dict* ): - dictionary of options as specified in the ``io_module``s documentation """ # Ignore parameters for other functions like 'load' module = get_io_module(toyz_module, io_module) params = module[file_type]['save'] save_options = params['params'].keys() + params['optional'].keys() file_options = {k: v for k, v in file_options.items() if k in save_options} if toyz_module == 'toyz': if io_module == 'python': if '+' not in file_options['mode'] and 'w' not in file_options[ 'mode']: file_options['mode'] = file_options['mode'] + '+' if 'columns' in file_options: columns = file_options['columns'] save_options = core.merge_dict({}, file_options) del save_options['columns'] else: columns = None save_options = file_options f = open(**save_options) if columns is not None: f.write(columns) f.write(data) f.close() elif io_module == 'numpy': import numpy as np np.save(file_options['file'], data) elif io_module == 'pandas': if file_type == 'csv': if 'columns' in file_options: file_options['columns'] = load_list( file_options['columns'], False) print('saving', file_options) print('data', data) if 'index' not in file_options: file_options['index'] = False data.to_csv(**file_options) elif file_type == 'hdf': data.to_hdf(**file_options) elif file_type == 'sql': data.to_sql(**file_options) else: raise ToyzIoError("'" + io_module + "' is not currently supported by Toyz. " "You may need to import an affiliated module") else: import importlib try: module = importlib.import_module(toyz_module) except ImportError: raise ToyzIoError("Could not import module '" + toyz_module + "'") try: save_fn = module.config.io_modules[io_module][file_type]['save_fn'] module.config.save_functions[save_fn](data, file_type, **file_options) except KeyError: raise ToyzIoError("Could not find " + io_module + " in " + toyz_module + " save_functions") return convert_options(toyz_module, io_module, file_type, file_options, 'save2load')
def load_data(toyz_module, io_module, file_type, file_options): """ Loads a data file using a specified python module and a set of options. Parameters toyz_module ( *string* ): - name of toyz module to use for i/o io_module ( *string* ): - name of python module to use for i/o file_type ( *string* ): - type of file to open (for example 'hdf', 'csv', 'npy', etc.) - *Note*: the ``file_type`` must be supported by the given ``io_module`` file_options ( *dict* ): - dictionary of options as specified in the ``io_module``s documentation """ meta = '' # Make a copy of the file_options to use file_options = core.merge_dict({}, file_options) # Ignore parameters for other functions like 'save' module = get_io_module(toyz_module, io_module) params = module[file_type]['load'] if 'optional' in params: load_options = params['params'].keys() + params['optional'].keys() else: load_options = params['params'].keys() file_options = {k: v for k, v in file_options.items() if k in load_options} print('keys', module[file_type]['load'].keys()) print('file_options', file_options) if toyz_module == 'toyz': print("in toyz") if io_module == 'python': print('in python') sep = file_options['sep'] del file_options['sep'] use_cols = file_options['use_cols'] del file_options['use_cols'] f = open(**file_options) data = [] if file_type == 'csv': for line in f: no_cr = line.split('\n')[0] data.append(no_cr.split(sep)) else: raise ToyzIoError( "Invalid file type '{0}' for python open file".format( file_type)) elif io_module == 'numpy': import numpy as np data = np.load(**file_options) elif io_module == 'pandas': import pandas as pd if file_type == 'csv': if 'dtype' in file_options: file_options['dtype'] = load_dict(file_options['dtype'], True) if 'header' in file_options: file_options['header'] = load_list(file_options['header'], True) if 'skiprows' in file_options: file_options['skiprows'] = load_list( file_options['skiprows'], True) if 'names' in file_options: file_options['names'] = load_list(file_options['names'], False) if 'na_values' in file_options: file_options['na_values'] = load_unknown( file_options['na_values'], False) if 'true_values' in file_options: file_options['true_values'] = load_list( file_options['true_values'], False) if 'false_values' in file_options: file_options['false_values'] = load_list( file_options['false_values'], False) if 'date_parser' in file_options: module = file_options['date_parser'].split('.')[0] func = file_options['date_parser'].split('.')[1:] import importlib module = importlib.import_module(module) file_options['date_parser'] = getattr(module, func) if 'usecols' in file_options: file_options['usecols'] = load_list( file_options['usecols'], False) df = pd.read_csv(**file_options) elif file_type == 'hdf': if 'columns' in file_options: file_options['columns'] = load_list( file_options['columns'], False) df = pd.read_hdf(**file_options) elif file_type == 'sql': from sqlalchemy import create_engine print('file options', file_options) engine = create_engine(file_options['connection']) sql = file_options['sql'] del file_options['connection'] del file_options['sql'] df = pd.read_sql(sql, engine, **file_options) else: raise ToyzIoError("File type is not yet supported") data = df else: import importlib try: module = importlib.import_module(toyz_module) except ImportError: raise ToyzIoError("Could not import module '" + toyz_module + "'") try: load_fn = module.config.io_modules[io_module][file_type]['load_fn'] data = module.config.load_functions[load_fn](file_type, **file_options) except KeyError: raise ToyzIoError("Could not find " + io_module + " in " + toyz_module + " load_functions") return data
def save_data(data, toyz_module, io_module, file_type, file_options): """ Save data to a file Parameters data ( *object* ): - data object to save toyz_module ( *string* ): - name of toyz module to use for i/o io_module ( *string* ): - name of python module to use for i/o file_type ( *string* ): - type of file to open (for example 'hdf', 'csv', 'npy', etc.) - *Note*: the ``file_type`` must be supported by the given ``io_module`` file_options ( *dict* ): - dictionary of options as specified in the ``io_module``s documentation """ # Ignore parameters for other functions like 'load' module = get_io_module(toyz_module, io_module) params = module[file_type]['save'] save_options = params['params'].keys()+params['optional'].keys() file_options = {k:v for k,v in file_options.items() if k in save_options} if toyz_module == 'toyz': if io_module=='python': if '+' not in file_options['mode'] and 'w' not in file_options['mode']: file_options['mode'] = file_options['mode']+'+' if 'columns' in file_options: columns = file_options['columns'] save_options = core.merge_dict({}, file_options) del save_options['columns'] else: columns = None save_options = file_options f = open(**save_options) if columns is not None: f.write(columns) f.write(data) f.close() elif io_module=='numpy': import numpy as np np.save(file_options['file'], data) elif io_module=='pandas': if file_type=='csv': if 'columns' in file_options: file_options['columns'] = load_list(file_options['columns'], False) print('saving', file_options) print('data', data) if 'index' not in file_options: file_options['index'] = False data.to_csv(**file_options) elif file_type=='hdf': data.to_hdf(**file_options) elif file_type=='sql': data.to_sql(**file_options) else: raise ToyzIoError( "'"+io_module+"' is not currently supported by Toyz. " "You may need to import an affiliated module") else: import importlib try: module = importlib.import_module(toyz_module) except ImportError: raise ToyzIoError("Could not import module '"+toyz_module+"'") try: save_fn = module.config.io_modules[io_module][file_type]['save_fn'] module.config.save_functions[save_fn](data, file_type, **file_options) except KeyError: raise ToyzIoError("Could not find "+io_module+" in "+toyz_module+" save_functions") return convert_options(toyz_module, io_module, file_type, file_options, 'save2load')
def load_data(toyz_module, io_module, file_type, file_options): """ Loads a data file using a specified python module and a set of options. Parameters toyz_module ( *string* ): - name of toyz module to use for i/o io_module ( *string* ): - name of python module to use for i/o file_type ( *string* ): - type of file to open (for example 'hdf', 'csv', 'npy', etc.) - *Note*: the ``file_type`` must be supported by the given ``io_module`` file_options ( *dict* ): - dictionary of options as specified in the ``io_module``s documentation """ meta = '' # Make a copy of the file_options to use file_options = core.merge_dict({}, file_options) # Ignore parameters for other functions like 'save' module = get_io_module(toyz_module, io_module) params = module[file_type]['load'] if 'optional' in params: load_options = params['params'].keys()+params['optional'].keys() else: load_options = params['params'].keys() file_options = {k:v for k,v in file_options.items() if k in load_options} print('keys', module[file_type]['load'].keys()) print('file_options', file_options) if toyz_module == 'toyz': print("in toyz") if io_module == 'python': print('in python') sep = file_options['sep'] del file_options['sep'] use_cols = file_options['use_cols'] del file_options['use_cols'] f = open(**file_options) data = [] if file_type == 'csv': for line in f: no_cr = line.split('\n')[0] data.append(no_cr.split(sep)) else: raise ToyzIoError("Invalid file type '{0}' for python open file".format(file_type)) elif io_module == 'numpy': import numpy as np data = np.load(**file_options) elif io_module == 'pandas': import pandas as pd if file_type == 'csv': if 'dtype' in file_options: file_options['dtype'] = load_dict(file_options['dtype'], True) if 'header' in file_options: file_options['header'] = load_list(file_options['header'], True) if 'skiprows' in file_options: file_options['skiprows'] = load_list(file_options['skiprows'], True) if 'names' in file_options: file_options['names'] = load_list(file_options['names'], False) if 'na_values' in file_options: file_options['na_values'] = load_unknown(file_options['na_values'], False) if 'true_values' in file_options: file_options['true_values'] = load_list(file_options['true_values'], False) if 'false_values' in file_options: file_options['false_values'] = load_list(file_options['false_values'], False) if 'date_parser' in file_options: module = file_options['date_parser'].split('.')[0] func = file_options['date_parser'].split('.')[1:] import importlib module = importlib.import_module(module) file_options['date_parser'] = getattr(module, func) if 'usecols' in file_options: file_options['usecols'] = load_list(file_options['usecols'], False) df = pd.read_csv(**file_options) elif file_type == 'hdf': if 'columns' in file_options: file_options['columns'] = load_list(file_options['columns'], False) df = pd.read_hdf(**file_options) elif file_type == 'sql': from sqlalchemy import create_engine print('file options', file_options) engine = create_engine(file_options['connection']) sql = file_options['sql'] del file_options['connection'] del file_options['sql'] df = pd.read_sql(sql, engine, **file_options) else: raise ToyzIoError("File type is not yet supported") data = df else: import importlib try: module = importlib.import_module(toyz_module) except ImportError: raise ToyzIoError("Could not import module '"+toyz_module+"'") try: load_fn = module.config.io_modules[io_module][file_type]['load_fn'] data = module.config.load_functions[load_fn](file_type, **file_options) except KeyError: raise ToyzIoError("Could not find "+io_module+" in "+toyz_module+" load_functions") return data