Ejemplo n.º 1
0
 def __init__(self, data=None, data_type=None, data_kwargs={}, paths={}, 
     user_id='', **kwargs):
     # default settings
     if not all([k in ['data', 'meta', 'log'] for k in paths]):
         raise ToyzDataError(
             "'paths' must be a dict of file parameters with the keys "
             "'data', 'meta', 'log'")
     path_options = {
         'toyz_module': '',
         'io_module': '',
         'file_type': '',
         'file_options': {}
     }
     default_paths = {f: dict(path_options) for f in ['data', 'meta', 'log']}
     self.user_id = user_id
     self.paths = core.merge_dict(default_paths, paths)
     self.set_data(data, data_type, data_kwargs)
     default_options = {
         'selected': [],
         'meta': {
             'creation': {
                 'time': datetime.now(),
                 'software': 'unknown'
             }
         },
         'links': {},
         'log': [],
         'fillna': 'NaN'
     }
     options = core.merge_dict(default_options, kwargs, True)
     for k,v in options.items():
         setattr(self, k, v)
Ejemplo n.º 2
0
 def __init__(self,
              module_info,
              data=None,
              data_type=None,
              data_kwargs={},
              paths={},
              user_id='',
              **kwargs):
     # default settings
     if not all([k in ['data', 'meta', 'log'] for k in paths]):
         raise ToyzDataError(
             "'paths' must be a dict of file parameters with the keys "
             "'data', 'meta', 'log'")
     path_options = {
         'toyz_module': '',
         'io_module': '',
         'file_type': '',
         'file_options': {}
     }
     default_paths = {
         f: dict(path_options)
         for f in ['data', 'meta', 'log']
     }
     self.user_id = user_id
     self.paths = core.merge_dict(default_paths, paths)
     # Set the module used to specify the data type (usually toyz unless the user has
     # created a custom data type, like astropy tables)
     if self.paths['data']['toyz_module'] == 'toyz':
         self.data_module = toyz.utils.sources
     else:
         import importlib
         self.data_module = importlib.import_module(
             self.paths['data']['toyz_module'])
     # import all of the functions
     self.set_data(data, data_type, data_kwargs)
     default_options = {
         'selected': [],
         'meta': {
             'creation': {
                 'time': datetime.now(),
                 'software': 'unknown'
             }
         },
         'links': {},
         'log': [],
         'fillna': 'NaN'
     }
     options = core.merge_dict(default_options, kwargs, True)
     for k, v in options.items():
         setattr(self, k, v)
Ejemplo n.º 3
0
 def save(self, save_paths={}):
     """
     Save the DataSource and, if applicable, the metadata and log.
     """
     # Save the data source
     for data_type, file_info in self.paths.items():
         # if the user already has load parameters, convert those to save parameters
         if file_info['io_module']!='':
             save_path = dict(file_info)
             save_path['file_options'] = toyz.utils.io.convert_options(
                 file_info['toyz_module'], 
                 file_info['io_module'], 
                 file_info['file_type'], 
                 file_info['file_options'], 
                 'load2save'
             )
             # If the user specified save parameters, update save_paths with those
             if data_type in save_paths:
                 core.merge_dict(save_path, save_paths[data_type])
         # The user must specify a data path to save the data source to
         elif data_type == 'data':
             raise ToyzDataError(
                 "You must supply 'toyz_module', 'io_module', 'file_type',"
                 " and 'file_options' to save")
         elif data_type in save_paths:
             save_path = save_paths[data_type]
         else:
             save_path = None
         # If a path exists for the data type, save the meta/data/log file
         if save_path is not None:
             print(data_type, save_path)
             # Save data and convert the save paths to paths that can be used to load the file
             # (note: since pandas read and write functions are not symmetric, the same settings
             # used to load the file might not work when loading it again after a save)
             self.paths[data_type]['file_options'] = toyz.utils.io.save_data(
                 self.data,
                 save_path['toyz_module'],
                 save_path['io_module'], 
                 save_path['file_type'],
                 save_path['file_options']
             )
         else:
             print('No path for data_type', data_type)
     
     print('self.path', self.paths)
     return self.paths['data']['file_options']
Ejemplo n.º 4
0
    def save(self, save_paths={}):
        """
        Save the DataSource and, if applicable, the metadata and log.
        """
        # Save the data source
        for data_type, file_info in self.paths.items():
            # if the user already has load parameters, convert those to save parameters
            if file_info['io_module'] != '':
                save_path = dict(file_info)
                save_path['file_options'] = toyz.utils.io.convert_options(
                    file_info['toyz_module'], file_info['io_module'],
                    file_info['file_type'], file_info['file_options'],
                    'load2save')
                # If the user specified save parameters, update save_paths with those
                if data_type in save_paths:
                    core.merge_dict(save_path, save_paths[data_type])
            # The user must specify a data path to save the data source to
            elif data_type == 'data':
                raise ToyzDataError(
                    "You must supply 'toyz_module', 'io_module', 'file_type',"
                    " and 'file_options' to save")
            elif data_type in save_paths:
                save_path = save_paths[data_type]
            else:
                save_path = None
            # If a path exists for the data type, save the meta/data/log file
            if save_path is not None:
                print(data_type, save_path)
                # Save data and convert the save paths to paths that can be used to load the file
                # (note: since pandas read and write functions are not symmetric, the same settings
                # used to load the file might not work when loading it again after a save)
                self.paths[data_type][
                    'file_options'] = toyz.utils.io.save_data(
                        self.data, save_path['toyz_module'],
                        save_path['io_module'], save_path['file_type'],
                        save_path['file_options'])
            else:
                print('No path for data_type', data_type)

        print('self.path', self.paths)
        return self.paths['data']['file_options']
Ejemplo n.º 5
0
def astropy_write(data, file_type, **file_options):
    """
    Write an astropy table to a file
    """
    from astropy.table import Table
    filename = file_options['filename']
    options = merge_dict({}, file_options)
    del options['filename']
    # Make sure to use the proper format
    if file_type!='ascii':
        options['format'] = file_type
    data.write(filename, **options)
Ejemplo n.º 6
0
def astropy_write(data, file_type, **file_options):
    """
    Write an astropy table to a file
    """
    from astropy.table import Table
    filename = file_options['filename']
    options = merge_dict({}, file_options)
    del options['filename']
    # Make sure to use the proper format
    if file_type != 'ascii':
        options['format'] = file_type
    data.write(filename, **options)
Ejemplo n.º 7
0
 def __init__(self, module_info, data=None, data_type=None, data_kwargs={}, paths={}, 
     user_id='', **kwargs):
     # default settings
     if not all([k in ['data', 'meta', 'log'] for k in paths]):
         raise ToyzDataError(
             "'paths' must be a dict of file parameters with the keys "
             "'data', 'meta', 'log'")
     path_options = {
         'toyz_module': '',
         'io_module': '',
         'file_type': '',
         'file_options': {}
     }
     default_paths = {f: dict(path_options) for f in ['data', 'meta', 'log']}
     self.user_id = user_id
     self.paths = core.merge_dict(default_paths, paths)
     # Set the module used to specify the data type (usually toyz unless the user has
     # created a custom data type, like astropy tables)
     if self.paths['data']['toyz_module']=='toyz':
         self.data_module = toyz.utils.sources
     else:
         import importlib
         self.data_module = importlib.import_module(self.paths['data']['toyz_module'])
     # import all of the functions 
     self.set_data(data, data_type, data_kwargs)
     default_options = {
         'selected': [],
         'meta': {
             'creation': {
                 'time': datetime.now(),
                 'software': 'unknown'
             }
         },
         'links': {},
         'log': [],
         'fillna': 'NaN'
     }
     options = core.merge_dict(default_options, kwargs, True)
     for k,v in options.items():
         setattr(self, k, v)
Ejemplo n.º 8
0
def astropy_read(file_type, **file_options):
    """
    Read into an astropy table
    """
    from astropy.table import Table
    filename = file_options['filename']
    options = merge_dict({}, file_options)
    del options['filename']
    # Make sure to use the proper format
    if file_type!='ascii':
        options['format'] = file_type
    data = Table.read(filename, **options)
    return data
Ejemplo n.º 9
0
def astropy_read(file_type, **file_options):
    """
    Read into an astropy table
    """
    from astropy.table import Table
    filename = file_options['filename']
    options = merge_dict({}, file_options)
    del options['filename']
    # Make sure to use the proper format
    if file_type != 'ascii':
        options['format'] = file_type
    data = Table.read(filename, **options)
    return data
Ejemplo n.º 10
0
def save_data(data, toyz_module, io_module, file_type, file_options):
    """
    Save data to a file
    
    Parameters
        data ( *object* ):
            - data object to save
        toyz_module ( *string* ):
            - name of toyz module to use for i/o
        io_module ( *string* ):
            - name of python module to use for i/o
        file_type ( *string* ): 
            - type of file to open (for example 'hdf', 'csv', 'npy', etc.)
            - *Note*: the ``file_type`` must be supported by the given ``io_module``
        file_options ( *dict* ): 
            - dictionary of options as specified in the ``io_module``s documentation
    """
    # Ignore parameters for other functions like 'load'
    module = get_io_module(toyz_module, io_module)
    params = module[file_type]['save']
    save_options = params['params'].keys() + params['optional'].keys()
    file_options = {k: v for k, v in file_options.items() if k in save_options}
    if toyz_module == 'toyz':
        if io_module == 'python':
            if '+' not in file_options['mode'] and 'w' not in file_options[
                    'mode']:
                file_options['mode'] = file_options['mode'] + '+'
            if 'columns' in file_options:
                columns = file_options['columns']
                save_options = core.merge_dict({}, file_options)
                del save_options['columns']
            else:
                columns = None
                save_options = file_options
            f = open(**save_options)
            if columns is not None:
                f.write(columns)
            f.write(data)
            f.close()
        elif io_module == 'numpy':
            import numpy as np
            np.save(file_options['file'], data)
        elif io_module == 'pandas':
            if file_type == 'csv':
                if 'columns' in file_options:
                    file_options['columns'] = load_list(
                        file_options['columns'], False)
                print('saving', file_options)
                print('data', data)
                if 'index' not in file_options:
                    file_options['index'] = False
                data.to_csv(**file_options)
            elif file_type == 'hdf':
                data.to_hdf(**file_options)
            elif file_type == 'sql':
                data.to_sql(**file_options)
        else:
            raise ToyzIoError("'" + io_module +
                              "' is not currently supported by Toyz. "
                              "You may need to import an affiliated module")
    else:
        import importlib
        try:
            module = importlib.import_module(toyz_module)
        except ImportError:
            raise ToyzIoError("Could not import module '" + toyz_module + "'")
        try:
            save_fn = module.config.io_modules[io_module][file_type]['save_fn']
            module.config.save_functions[save_fn](data, file_type,
                                                  **file_options)
        except KeyError:
            raise ToyzIoError("Could not find " + io_module + " in " +
                              toyz_module + " save_functions")
    return convert_options(toyz_module, io_module, file_type, file_options,
                           'save2load')
Ejemplo n.º 11
0
def load_data(toyz_module, io_module, file_type, file_options):
    """
    Loads a data file using a specified python module and a set of options.
    
    Parameters
        toyz_module ( *string* ):
            - name of toyz module to use for i/o
        io_module ( *string* ):
            - name of python module to use for i/o
        file_type ( *string* ): 
            - type of file to open (for example 'hdf', 'csv', 'npy', etc.)
            - *Note*: the ``file_type`` must be supported by the given ``io_module``
        file_options ( *dict* ): 
            - dictionary of options as specified in the ``io_module``s documentation
    """
    meta = ''
    # Make a copy of the file_options to use
    file_options = core.merge_dict({}, file_options)
    # Ignore parameters for other functions like 'save'
    module = get_io_module(toyz_module, io_module)
    params = module[file_type]['load']
    if 'optional' in params:
        load_options = params['params'].keys() + params['optional'].keys()
    else:
        load_options = params['params'].keys()
    file_options = {k: v for k, v in file_options.items() if k in load_options}
    print('keys', module[file_type]['load'].keys())
    print('file_options', file_options)
    if toyz_module == 'toyz':
        print("in toyz")
        if io_module == 'python':
            print('in python')
            sep = file_options['sep']
            del file_options['sep']
            use_cols = file_options['use_cols']
            del file_options['use_cols']
            f = open(**file_options)
            data = []
            if file_type == 'csv':
                for line in f:
                    no_cr = line.split('\n')[0]
                    data.append(no_cr.split(sep))
            else:
                raise ToyzIoError(
                    "Invalid file type '{0}' for python open file".format(
                        file_type))
        elif io_module == 'numpy':
            import numpy as np
            data = np.load(**file_options)
        elif io_module == 'pandas':
            import pandas as pd
            if file_type == 'csv':
                if 'dtype' in file_options:
                    file_options['dtype'] = load_dict(file_options['dtype'],
                                                      True)
                if 'header' in file_options:
                    file_options['header'] = load_list(file_options['header'],
                                                       True)
                if 'skiprows' in file_options:
                    file_options['skiprows'] = load_list(
                        file_options['skiprows'], True)
                if 'names' in file_options:
                    file_options['names'] = load_list(file_options['names'],
                                                      False)
                if 'na_values' in file_options:
                    file_options['na_values'] = load_unknown(
                        file_options['na_values'], False)
                if 'true_values' in file_options:
                    file_options['true_values'] = load_list(
                        file_options['true_values'], False)
                if 'false_values' in file_options:
                    file_options['false_values'] = load_list(
                        file_options['false_values'], False)
                if 'date_parser' in file_options:
                    module = file_options['date_parser'].split('.')[0]
                    func = file_options['date_parser'].split('.')[1:]
                    import importlib
                    module = importlib.import_module(module)
                    file_options['date_parser'] = getattr(module, func)
                if 'usecols' in file_options:
                    file_options['usecols'] = load_list(
                        file_options['usecols'], False)
                df = pd.read_csv(**file_options)
            elif file_type == 'hdf':
                if 'columns' in file_options:
                    file_options['columns'] = load_list(
                        file_options['columns'], False)

                df = pd.read_hdf(**file_options)
            elif file_type == 'sql':
                from sqlalchemy import create_engine
                print('file options', file_options)
                engine = create_engine(file_options['connection'])
                sql = file_options['sql']
                del file_options['connection']
                del file_options['sql']
                df = pd.read_sql(sql, engine, **file_options)
            else:
                raise ToyzIoError("File type is not yet supported")
            data = df
    else:
        import importlib
        try:
            module = importlib.import_module(toyz_module)
        except ImportError:
            raise ToyzIoError("Could not import module '" + toyz_module + "'")
        try:
            load_fn = module.config.io_modules[io_module][file_type]['load_fn']
            data = module.config.load_functions[load_fn](file_type,
                                                         **file_options)
        except KeyError:
            raise ToyzIoError("Could not find " + io_module + " in " +
                              toyz_module + " load_functions")
    return data
Ejemplo n.º 12
0
Archivo: io.py Proyecto: fred3m/toyz
def save_data(data, toyz_module, io_module, file_type, file_options):
    """
    Save data to a file
    
    Parameters
        data ( *object* ):
            - data object to save
        toyz_module ( *string* ):
            - name of toyz module to use for i/o
        io_module ( *string* ):
            - name of python module to use for i/o
        file_type ( *string* ): 
            - type of file to open (for example 'hdf', 'csv', 'npy', etc.)
            - *Note*: the ``file_type`` must be supported by the given ``io_module``
        file_options ( *dict* ): 
            - dictionary of options as specified in the ``io_module``s documentation
    """
    # Ignore parameters for other functions like 'load'
    module = get_io_module(toyz_module, io_module)
    params = module[file_type]['save']
    save_options = params['params'].keys()+params['optional'].keys()
    file_options = {k:v for k,v in file_options.items() if k in save_options}
    if toyz_module == 'toyz':
        if io_module=='python':
            if '+' not in file_options['mode'] and 'w' not in file_options['mode']:
                file_options['mode'] = file_options['mode']+'+'
            if 'columns' in file_options:
                columns = file_options['columns']
                save_options = core.merge_dict({}, file_options)
                del save_options['columns']
            else:
                columns = None
                save_options = file_options
            f = open(**save_options)
            if columns is not None:
                f.write(columns)
            f.write(data)
            f.close()
        elif io_module=='numpy':
            import numpy as np
            np.save(file_options['file'], data)
        elif io_module=='pandas':
            if file_type=='csv':
                if 'columns' in file_options:
                    file_options['columns'] = load_list(file_options['columns'], False)
                print('saving', file_options)
                print('data', data)
                if 'index' not in file_options:
                    file_options['index'] = False
                data.to_csv(**file_options)
            elif file_type=='hdf':
                data.to_hdf(**file_options)
            elif file_type=='sql':
                data.to_sql(**file_options)
        else:
            raise ToyzIoError(
                "'"+io_module+"' is not currently supported by Toyz. "
                "You may need to import an affiliated module")
    else:
        import importlib
        try:
            module = importlib.import_module(toyz_module)
        except ImportError:
            raise ToyzIoError("Could not import module '"+toyz_module+"'")
        try:
            save_fn = module.config.io_modules[io_module][file_type]['save_fn']
            module.config.save_functions[save_fn](data, file_type, **file_options)
        except KeyError:
            raise ToyzIoError("Could not find "+io_module+" in "+toyz_module+" save_functions")
    return convert_options(toyz_module, io_module, file_type, file_options, 'save2load')
Ejemplo n.º 13
0
Archivo: io.py Proyecto: fred3m/toyz
def load_data(toyz_module, io_module, file_type, file_options):
    """
    Loads a data file using a specified python module and a set of options.
    
    Parameters
        toyz_module ( *string* ):
            - name of toyz module to use for i/o
        io_module ( *string* ):
            - name of python module to use for i/o
        file_type ( *string* ): 
            - type of file to open (for example 'hdf', 'csv', 'npy', etc.)
            - *Note*: the ``file_type`` must be supported by the given ``io_module``
        file_options ( *dict* ): 
            - dictionary of options as specified in the ``io_module``s documentation
    """
    meta = ''
    # Make a copy of the file_options to use
    file_options = core.merge_dict({}, file_options)
    # Ignore parameters for other functions like 'save'
    module = get_io_module(toyz_module, io_module)
    params = module[file_type]['load']
    if 'optional' in params:
        load_options = params['params'].keys()+params['optional'].keys()
    else:
        load_options = params['params'].keys()
    file_options = {k:v for k,v in file_options.items() if k in load_options}
    print('keys', module[file_type]['load'].keys())
    print('file_options', file_options)
    if toyz_module == 'toyz':
        print("in toyz")
        if io_module == 'python':
            print('in python')
            sep = file_options['sep']
            del file_options['sep']
            use_cols = file_options['use_cols']
            del file_options['use_cols']
            f = open(**file_options)
            data = []
            if file_type == 'csv':
                for line in f:
                    no_cr = line.split('\n')[0]
                    data.append(no_cr.split(sep))
            else:
                raise ToyzIoError("Invalid file type '{0}' for python open file".format(file_type))
        elif io_module == 'numpy':
            import numpy as np
            data = np.load(**file_options)
        elif io_module == 'pandas':
            import pandas as pd
            if file_type == 'csv':
                if 'dtype' in file_options:
                    file_options['dtype'] = load_dict(file_options['dtype'], True)
                if 'header' in file_options:
                    file_options['header'] = load_list(file_options['header'], True)
                if 'skiprows' in file_options:
                    file_options['skiprows'] = load_list(file_options['skiprows'], True)
                if 'names' in file_options:
                    file_options['names'] = load_list(file_options['names'], False)
                if 'na_values' in file_options:
                    file_options['na_values'] = load_unknown(file_options['na_values'], False)
                if 'true_values' in file_options:
                    file_options['true_values'] = load_list(file_options['true_values'], False)
                if 'false_values' in file_options:
                    file_options['false_values'] = load_list(file_options['false_values'], False)
                if 'date_parser' in file_options:
                    module = file_options['date_parser'].split('.')[0]
                    func = file_options['date_parser'].split('.')[1:]
                    import importlib
                    module = importlib.import_module(module)
                    file_options['date_parser'] = getattr(module, func)
                if 'usecols' in file_options:
                    file_options['usecols'] = load_list(file_options['usecols'], False)
                df = pd.read_csv(**file_options)
            elif file_type == 'hdf':
                if 'columns' in file_options:
                    file_options['columns'] = load_list(file_options['columns'], False)
                
                df = pd.read_hdf(**file_options)
            elif file_type == 'sql':
                from sqlalchemy import create_engine
                print('file options', file_options)
                engine = create_engine(file_options['connection'])
                sql = file_options['sql']
                del file_options['connection']
                del file_options['sql']
                df = pd.read_sql(sql, engine, **file_options)
            else:
                raise ToyzIoError("File type is not yet supported")
            data = df
    else:
        import importlib
        try:
            module = importlib.import_module(toyz_module)
        except ImportError:
            raise ToyzIoError("Could not import module '"+toyz_module+"'")
        try:
            load_fn = module.config.io_modules[io_module][file_type]['load_fn']
            data = module.config.load_functions[load_fn](file_type, **file_options)
        except KeyError:
            raise ToyzIoError("Could not find "+io_module+" in "+toyz_module+" load_functions")
    return data