def test_invalid_ext(self):
        # wrong extension
        with pytest.raises(SPYValueError):
            filename_parser("test.wrongExtension")

        # no extension
        with pytest.raises(SPYValueError):
            filename_parser("test")
    def test_invalid_spy_container(self):
        fname = "sessionName/sessionName_testTag.analog"
        with pytest.raises(SPYValueError):
            filename_parser(fname, is_in_valid_container=True)

        fname = "wrongContainer.spy/sessionName_testTag.analog"
        with pytest.raises(SPYValueError):
            filename_parser(fname, is_in_valid_container=True)
 def test_folder_only(self):
     assert filename_parser("container.spy") == {
         'filename': None,
         'container': 'container.spy',
         'folder': os.getcwd(),
         'tag': None,
         'basename': 'container',
         'extension': '.spy'
     }
     folder = "{}/tmp".format("C:" if platform.system() ==
                              "Windows" else "")
     assert filename_parser("/tmp/container.spy") == {
         'filename': None,
         'container': 'container.spy',
         'folder': os.path.normpath(folder),
         'tag': None,
         'basename': 'container',
         'extension': '.spy'
     }
 def test_valid_spy_container(self):
     fname = "sessionName.spy/sessionName_testTag.analog"
     assert filename_parser(fname, is_in_valid_container=True) == {
         "filename": "sessionName_testTag.analog",
         "container": "sessionName.spy",
         "folder": os.path.join(os.getcwd(), "sessionName.spy"),
         "tag": "testTag",
         "basename": "sessionName",
         "extension": ".analog"
     }
 def test_with_info_ext(self):
     fname = "sessionName_testTag.analog.info"
     assert filename_parser(fname) == {
         "filename": fname.replace(".info", ""),
         "container": None,
         "folder": os.getcwd(),
         "tag": None,
         "basename": "sessionName_testTag",
         "extension": ".analog"
     }
 def test_fname_only(self):
     fname = "sessionName_testTag.analog"
     assert filename_parser(fname) == {
         "filename": fname,
         "container": None,
         "folder": os.getcwd(),
         "tag": None,
         "basename": "sessionName_testTag",
         "extension": ".analog"
     }
 def test_with_full_path(self):
     fname = os.path.normpath(
         "/tmp/sessionName.spy/sessionName_testTag.analog")
     folder = "{}/tmp".format("C:" if platform.system() ==
                              "Windows" else "")
     assert filename_parser(fname, is_in_valid_container=True) == {
         "filename": "sessionName_testTag.analog",
         "container": "sessionName.spy",
         "folder": os.path.join(os.path.normpath(folder),
                                "sessionName.spy"),
         "tag": "testTag",
         "basename": "sessionName",
         "extension": ".analog"
     }
 def test_none(self):
     assert all([value is None for value in filename_parser(None).values()])
Beispiel #9
0
def save(out,
         container=None,
         tag=None,
         filename=None,
         overwrite=False,
         memuse=100):
    r"""Save Syncopy data object to disk

    The underlying array data object is stored in a HDF5 file, the metadata in
    a JSON file. Both can be placed inside a Syncopy container, which is a
    regular directory with the extension '.spy'. 

    Parameters
    ----------
    out : Syncopy data object
        Object to be stored on disk.    
    container : str
        Path to Syncopy container folder (\*.spy) to be used for saving. If 
        omitted, the extension '.spy' will be added to the folder name.
    tag : str
        Tag to be appended to container basename
    filename :  str
        Explicit path to data file. This is only necessary if the data should
        not be part of a container folder. An extension (\*.<dataclass>) is
        added if omitted. The `tag` argument is ignored.      
    overwrite : bool
        If `True` an existing HDF5 file and its accompanying JSON file is 
        overwritten (without prompt). 
    memuse : scalar 
        Approximate in-memory cache size (in MB) for writing data to disk
        (only relevant for :class:`syncopy.VirtualData` or memory map data sources)
        
    Returns
    -------
    Nothing : None
    
    Notes
    ------
    Syncopy objects may also be saved using the class method ``.save`` that 
    acts as a wrapper for :func:`syncopy.save`, e.g., 
    
    >>> save(obj, container="new_spy_container")
    
    is equivalent to
    
    >>> obj.save(container="new_spy_container")
    
    However, once a Syncopy object has been saved, the class method ``.save``
    can be used as a shortcut to quick-save recent changes, e.g., 
    
    >>> obj.save()
    
    writes the current state of `obj` to the data/meta-data files on-disk 
    associated with `obj` (overwriting both in the process). Similarly, 
    
    >>> obj.save(tag='newtag')
    
    saves `obj` in the current container 'new_spy_container' under a different 
    tag. 

    Examples
    -------- 
    Save the Syncopy data object `obj` on disk in the current working directory
    without creating a spy-container
    
    >>> spy.save(obj, filename="session1")
    >>> # --> os.getcwd()/session1.<dataclass>
    >>> # --> os.getcwd()/session1.<dataclass>.info
    
    Save `obj` without creating a spy-container using an absolute path

    >>> spy.save(obj, filename="/tmp/session1")
    >>> # --> /tmp/session1.<dataclass>
    >>> # --> /tmp/session1.<dataclass>.info
    
    Save `obj` in a new spy-container created in the current working directory

    >>> spy.save(obj, container="container.spy")
    >>> # --> os.getcwd()/container.spy/container.<dataclass>
    >>> # --> os.getcwd()/container.spy/container.<dataclass>.info

    Save `obj` in a new spy-container created by providing an absolute path

    >>> spy.save(obj, container="/tmp/container.spy")
    >>> # --> /tmp/container.spy/container.<dataclass>
    >>> # --> /tmp/container.spy/container.<dataclass>.info

    Save `obj` in a new (or existing) spy-container under a different tag
    
    >>> spy.save(obj, container="session1.spy", tag="someTag")
    >>> # --> os.getcwd()/session1.spy/session1_someTag.<dataclass>
    >>> # --> os.getcwd()/session1.spy/session1_someTag.<dataclass>.info

    See also
    --------
    syncopy.load : load data created with :func:`syncopy.save`
    """

    # Make sure `out` is a valid Syncopy data object
    data_parser(out, varname="out", writable=None, empty=False)

    if filename is None and container is None:
        raise SPYError('filename and container cannot both be `None`')

    if container is not None and filename is None:
        # construct filename from container name
        if not isinstance(container, str):
            raise SPYTypeError(container, varname="container", expected="str")
        if not os.path.splitext(container)[1] == ".spy":
            container += ".spy"
        fileInfo = filename_parser(container)
        filename = os.path.join(fileInfo["folder"], fileInfo["container"],
                                fileInfo["basename"])
        # handle tag
        if tag is not None:
            if not isinstance(tag, str):
                raise SPYTypeError(tag, varname="tag", expected="str")
            filename += '_' + tag

    elif container is not None and filename is not None:
        raise SPYError(
            "container and filename cannot be used at the same time")

    if not isinstance(filename, str):
        raise SPYTypeError(filename, varname="filename", expected="str")

    # add extension if not part of the filename
    if "." not in os.path.splitext(filename)[1]:
        filename += out._classname_to_extension()

    try:
        scalar_parser(memuse, varname="memuse", lims=[0, np.inf])
    except Exception as exc:
        raise exc

    if not isinstance(overwrite, bool):
        raise SPYTypeError(overwrite, varname="overwrite", expected="bool")

    # Parse filename for validity and construct full path to HDF5 file
    fileInfo = filename_parser(filename)
    if fileInfo["extension"] != out._classname_to_extension():
        raise SPYError("""Extension in filename ({ext}) does not match data 
                    class ({dclass})""".format(ext=fileInfo["extension"],
                                               dclass=out.__class__.__name__))
    dataFile = os.path.join(fileInfo["folder"], fileInfo["filename"])

    # If `out` is to replace its own on-disk representation, be more careful
    if overwrite and dataFile == out.filename:
        replace = True
    else:
        replace = False

    # Prevent `out` from trying to re-create its own data file
    if replace:
        out.data.flush()
        h5f = out.data.file
        dat = out.data
        trl = h5f["trialdefinition"]
    else:
        if not os.path.exists(fileInfo["folder"]):
            try:
                os.makedirs(fileInfo["folder"])
            except IOError:
                raise SPYIOError(fileInfo["folder"])
            except Exception as exc:
                raise exc
        else:
            if os.path.exists(dataFile):
                if not os.path.isfile(dataFile):
                    raise SPYIOError(dataFile)
                if overwrite:
                    try:
                        h5f = h5py.File(dataFile, mode="w")
                        h5f.close()
                    except Exception as exc:
                        msg = "Cannot overwrite {} - file may still be open. "
                        msg += "Original error message below\n{}"
                        raise SPYError(msg.format(dataFile, str(exc)))
                else:
                    raise SPYIOError(dataFile, exists=True)
        h5f = h5py.File(dataFile, mode="w")

        # Save each member of `_hdfFileDatasetProperties` in target HDF file
        for datasetName in out._hdfFileDatasetProperties:
            dataset = getattr(out, datasetName)

            # Member is a memory map
            if isinstance(dataset, np.memmap):
                # Given memory cap, compute how many data blocks can be grabbed
                # per swipe (divide by 2 since we're working with an add'l tmp array)
                memuse *= 1024**2 / 2
                nrow = int(
                    memuse /
                    (np.prod(dataset.shape[1:]) * dataset.dtype.itemsize))
                rem = int(dataset.shape[0] % nrow)
                n_blocks = [nrow] * int(
                    dataset.shape[0] // nrow) + [rem] * int(rem > 0)

                # Write data block-wise to dataset (use `clear` to wipe blocks of
                # mem-maps from memory)
                dat = h5f.create_dataset(datasetName,
                                         dtype=dataset.dtype,
                                         shape=dataset.shape)
                for m, M in enumerate(n_blocks):
                    dat[m * nrow:m * nrow +
                        M, :] = out.data[m * nrow:m * nrow + M, :]
                    out.clear()

            # Member is a HDF5 dataset
            else:
                dat = h5f.create_dataset(datasetName, data=dataset)

    # Now write trial-related information
    trl_arr = np.array(out.trialdefinition)
    if replace:
        trl[()] = trl_arr
        trl.flush()
    else:
        trl = h5f.create_dataset("trialdefinition",
                                 data=trl_arr,
                                 maxshape=(None, trl_arr.shape[1]))

    # Write to log already here so that the entry can be exported to json
    infoFile = dataFile + FILE_EXT["info"]
    out.log = "Wrote files " + dataFile + "\n\t\t\t" + 2 * " " + infoFile

    # While we're at it, write cfg entries
    out.cfg = {
        "method": sys._getframe().f_code.co_name,
        "files": [dataFile, infoFile]
    }

    # Assemble dict for JSON output: order things by their "readability"
    outDict = OrderedDict(startInfoDict)
    outDict["filename"] = fileInfo["filename"]
    outDict["dataclass"] = out.__class__.__name__
    outDict["data_dtype"] = dat.dtype.name
    outDict["data_shape"] = dat.shape
    outDict["data_offset"] = dat.id.get_offset()
    outDict["trl_dtype"] = trl.dtype.name
    outDict["trl_shape"] = trl.shape
    outDict["trl_offset"] = trl.id.get_offset()
    if isinstance(out.data, np.ndarray):
        if np.isfortran(out.data):
            outDict["order"] = "F"
    else:
        outDict["order"] = "C"

    for key in out._infoFileProperties:
        value = getattr(out, key)
        if isinstance(value, np.ndarray):
            value = value.tolist()
        # potentially nested dicts
        elif isinstance(value, dict):
            value = dict(value)
            _dict_converter(value)
        outDict[key] = value

    # Save relevant stuff as HDF5 attributes
    for key in out._hdfFileAttributeProperties:
        if outDict[key] is None:
            h5f.attrs[key] = "None"
        else:
            try:
                h5f.attrs[key] = outDict[key]
            except RuntimeError:
                msg = "Too many entries in `{}` - truncating HDF5 attribute. " +\
                    "Please refer to {} for complete listing."
                info_fle = os.path.split(
                    os.path.split(filename.format(ext=FILE_EXT["info"]))[0])[1]
                info_fle = os.path.join(
                    info_fle,
                    os.path.basename(filename.format(ext=FILE_EXT["info"])))
                SPYWarning(msg.format(key, info_fle))
                h5f.attrs[key] = [outDict[key][0], "...", outDict[key][-1]]

    # Re-assign filename after saving (and remove source in case it came from `__storage__`)
    if not replace:
        h5f.close()
        if __storage__ in out.filename:
            out.data.file.close()
            os.unlink(out.filename)
        out.data = dataFile

    # Compute checksum and finally write JSON (automatically overwrites existing)
    outDict["file_checksum"] = hash_file(dataFile)

    with open(infoFile, 'w') as out_json:
        json.dump(outDict, out_json, indent=4)

    return
Beispiel #10
0
def load(filename,
         tag=None,
         dataclass=None,
         checksum=False,
         mode="r+",
         out=None):
    """
    Load Syncopy data object(s) from disk
    
    Either loads single files within or outside of '.spy'-containers or loads
    multiple objects from a single '.spy'-container. Loading from containers can 
    be further controlled by imposing restrictions on object class(es) (via 
    `dataclass`) and file-name tag(s) (via `tag`). 
    
    Parameters
    ----------
    filename : str
        Either path to Syncopy container folder (\*.spy, if omitted, the extension
        '.spy' will be appended) or name of data or metadata file. If `filename`
        points to a container and no further specifications are provided, the 
        entire contents of the container is loaded. Otherwise, specific objects
        may be selected using the `dataclass` or `tag` keywords (see below). 
    tag : None or str or list
        If `filename` points to a container, `tag` may be used to filter objects
        by filename-`tag`. Multiple tags can be provided using a list, e.g., 
        ``tag = ['experiment1', 'experiment2']``. Can be combined with `dataclass`
        (see below). Invalid if `filename` points to a single file. 
    dataclass : None or str or list
        If provided, only objects of provided dataclass are loaded from disk. 
        Available options are '.analog', '.spectral', .spike' and '.event' 
        (as listed in  ``spy.FILE_EXT["data"]``). Multiple class specifications
        can be provided using a list, e.g., ``dataclass = ['.analog', '.spike']``.
        Can be combined with `tag` (see above) and is also valid if `filename`
        points to a single file (e.g., to ensure loaded object is of a specific
        type). 
    checksum : bool
        If `True`, checksum-matching is performed on loaded object(s) to ensure
        data-integrity (impairs performance particularly when loading large files). 
    mode : str
        Data access mode of loaded objects (can be 'r' for read-only, 'r+' or 'w'
        for read/write access). 
    out : Syncopy data object
        Empty object to be filled with data loaded from disk. Has to match the 
        type of the on-disk file (e.g., ``filename = 'mydata.analog'`` requires
        `out` to be a :class:`syncopy.AnalogData` object). Can only be used 
        when loading single objects from disk (`out` is ignored when multiple
        files are loaded from a container). 
        
    Returns
    -------
    Nothing : None
        If a single file is loaded and `out` was provided, `out` is filled with
        data loaded from disk, i.e., :func:`syncopy.load` does **not** create a 
        new object
    obj : Syncopy data object
        If a single file is loaded and `out` was `None`, :func:`syncopy.load` 
        returns a new object. 
    objdict : dict
        If multiple files are loaded, :func:`syncopy.load` creates a new object
        for each file and places them in a dictionary whose keys are the base-names
        (sans path) of the corresponding files. 
        
    Notes
    -----
    All of Syncopy's classes offer (limited) support for data loading upon object
    creation. Just as the class method ``.save`` can be used as a shortcut for
    :func:`syncopy.save`, Syncopy objects can be created from Syncopy data-files 
    upon creation, e.g., 
    
    >>> adata = spy.AnalogData('/path/to/session1.analog')
    
    creates a new :class:`syncopy.AnalogData` object and immediately fills it 
    with data loaded from the file "/path/to/session1.analog". 
    
    Since only one object can be created at a time, this loading shortcut only 
    supports single file specifications (i.e., ``spy.AnalogData("container.spy")``
    is invalid). 

    Examples
    -------- 
    Load all objects found in the spy-container "sessionName" (the extension ".spy" 
    may or may not be provided)
    
    >>> objectDict = spy.load("sessionName")
    >>> # --> returns a dict with base-filenames as keys
    
    Load all :class:`syncopy.AnalogData` and :class:`syncopy.SpectralData` objects
    from the spy-container "sessionName"
    
    >>> objectDict = spy.load("sessionName.spy", dataclass=['analog', 'spectral'])
    
    Load a specific :class:`syncopy.AnalogData` object from the above spy-container
    
    >>> obj = spy.load("sessionName.spy/sessionName_someTag.analog")
    
    This is equivalent to
    
    >>> obj = spy.AnalogData("sessionName.spy/sessionName_someTag.analog")
    
    If the "sessionName" spy-container only contains one object with the tag 
    "someTag", the above call is equivalent to
    
    >>> obj = spy.load("sessionName.spy", tag="someTag")
    
    If there are multiple objects of different types using the same tag "someTag",
    the above call can be further narrowed down to only load the requested 
    :class:`syncopy.AnalogData` object
       
    >>> obj = spy.load("sessionName.spy", tag="someTag", dataclass="analog")
    
    See also
    --------
    syncopy.save : save syncopy object on disk
    """

    # Ensure `filename` is either a valid .spy container or data file: if `filename`
    # is a directory w/o '.spy' extension, append it
    if not isinstance(filename, str):
        raise SPYTypeError(filename, varname="filename", expected="str")
    if len(os.path.splitext(os.path.abspath(
            os.path.expanduser(filename)))[1]) == 0:
        filename += FILE_EXT["dir"]
    try:
        fileInfo = filename_parser(filename)
    except Exception as exc:
        raise exc

    if tag is not None:
        if isinstance(tag, str):
            tags = [tag]
        else:
            tags = tag
        try:
            array_parser(tags, varname="tag", ntype=str)
        except Exception as exc:
            raise exc
        if fileInfo["filename"] is not None:
            raise SPYError("Only containers can be loaded with `tag` keyword!")
        for tk in range(len(tags)):
            tags[tk] = "*" + tags[tk] + "*"
    else:
        tags = "*"

    # If `dataclass` was provided, format it for our needs (e.g. 'spike' -> ['.spike'])
    if dataclass is not None:
        if isinstance(dataclass, str):
            dataclass = [dataclass]
        try:
            array_parser(dataclass, varname="dataclass", ntype=str)
        except Exception as exc:
            raise exc
        dataclass = [
            "." + dclass if not dclass.startswith(".") else dclass
            for dclass in dataclass
        ]
        extensions = set(dataclass).intersection(FILE_EXT["data"])
        if len(extensions) == 0:
            lgl = "extension(s) '" + "or '".join(ext + "' "
                                                 for ext in FILE_EXT["data"])
            raise SPYValueError(legal=lgl,
                                varname="dataclass",
                                actual=str(dataclass))

    # Avoid any misunderstandings here...
    if not isinstance(checksum, bool):
        raise SPYTypeError(checksum, varname="checksum", expected="bool")

    # Abuse `AnalogData.mode`-setter to vet `mode`
    try:
        spd.AnalogData().mode = mode
    except Exception as exc:
        raise exc

    # If `filename` points to a spy container, `glob` what's inside, otherwise just load
    if fileInfo["filename"] is None:

        if dataclass is None:
            extensions = FILE_EXT["data"]
        container = os.path.join(fileInfo["folder"], fileInfo["container"])
        fileList = []
        for ext in extensions:
            for tag in tags:
                fileList.extend(glob(os.path.join(container, tag + ext)))
        if len(fileList) == 0:
            fsloc = os.path.join(container, "" + \
                                 "or ".join(tag + " " for tag in tags) + \
                                 "with extensions " + \
                                 "or ".join(ext + " " for ext in extensions))
            raise SPYIOError(fsloc, exists=False)
        if len(fileList) == 1:
            return _load(fileList[0], checksum, mode, out)
        if out is not None:
            msg = "When loading multiple objects, the `out` keyword is ignored"
            SPYWarning(msg)
        objectDict = {}
        for fname in fileList:
            obj = _load(fname, checksum, mode, None)
            objectDict[os.path.basename(obj.filename)] = obj
        return objectDict

    else:

        if dataclass is not None:
            if os.path.splitext(fileInfo["filename"])[1] not in dataclass:
                lgl = "extension '" + \
                    "or '".join(dclass + "' " for dclass in dataclass)
                raise SPYValueError(legal=lgl,
                                    varname="filename",
                                    actual=fileInfo["filename"])
        return _load(filename, checksum, mode, out)
Beispiel #11
0
def _load(filename, checksum, mode, out):
    """
    Local helper
    """

    fileInfo = filename_parser(filename)
    hdfFile = os.path.join(fileInfo["folder"], fileInfo["filename"])
    jsonFile = hdfFile + FILE_EXT["info"]

    try:
        _ = io_parser(hdfFile, varname="hdfFile", isfile=True, exists=True)
        _ = io_parser(jsonFile, varname="jsonFile", isfile=True, exists=True)
    except Exception as exc:
        raise exc

    with open(jsonFile, "r") as file:
        jsonDict = json.load(file)

    if "dataclass" not in jsonDict.keys():
        raise SPYError(
            "Info file {} does not contain a dataclass field".format(jsonFile))

    if hasattr(spd, jsonDict["dataclass"]):
        dataclass = getattr(spd, jsonDict["dataclass"])
    else:
        raise SPYError("Unknown data class {class}".format(
            jsonDict["dataclass"]))

    requiredFields = tuple(
        startInfoDict.keys()) + dataclass._infoFileProperties

    for key in requiredFields:
        if key not in jsonDict.keys():
            raise SPYError(
                "Required field {field} for {cls} not in {file}".format(
                    field=key, cls=dataclass.__name__, file=jsonFile))

    # If `_hdr` is an empty list, set it to `None` to not confuse meta-functions
    hdr = jsonDict.get("_hdr")
    if isinstance(hdr, (list, np.ndarray)):
        if len(hdr) == 0:
            jsonDict["_hdr"] = None

    # FIXME: add version comparison (syncopy.__version__ vs jsonDict["_version"])

    # If wanted, perform checksum matching
    if checksum:
        hsh_msg = "hash = {hsh:s}"
        hsh = hash_file(hdfFile)
        if hsh != jsonDict["file_checksum"]:
            raise SPYValueError(
                legal=hsh_msg.format(hsh=jsonDict["file_checksum"]),
                varname=os.path.basename(hdfFile),
                actual=hsh_msg.format(hsh=hsh))

    # Parsing is done, create new or check provided object
    if out is not None:
        try:
            data_parser(out,
                        varname="out",
                        writable=True,
                        dataclass=jsonDict["dataclass"])
        except Exception as exc:
            raise exc
        new_out = False
    else:
        out = dataclass()
        new_out = True

    # First and foremost, assign dimensional information
    dimord = jsonDict.pop("dimord")
    out.dimord = dimord

    # Access data on disk (error checking is done by setters)
    out.mode = mode
    for datasetProperty in out._hdfFileDatasetProperties:
        setattr(out, datasetProperty,
                h5py.File(hdfFile, mode="r")[datasetProperty])

    # Abuse ``definetrial`` to set trial-related props
    trialdef = h5py.File(hdfFile, mode="r")["trialdefinition"][()]
    out.definetrial(trialdef)

    # Assign metadata
    for key in [
            prop for prop in dataclass._infoFileProperties if prop != "dimord"
    ]:
        setattr(out, key, jsonDict[key])

    # Write `cfg` entries
    thisMethod = sys._getframe().f_code.co_name.replace("_", "")
    out.cfg = {"method": thisMethod, "files": [hdfFile, jsonFile]}

    # Write log-entry
    msg = "Read files v. {ver:s} ".format(ver=jsonDict["_version"])
    msg += "{hdf:s}\n\t" + (len(msg) + len(thisMethod) + 2) * " " + "{json:s}"
    out.log = msg.format(hdf=hdfFile, json=jsonFile)

    # Happy breakdown
    return out if new_out else None