class Archive(object): ''' Example ------- .. code-block:: python >>> arch = Archive('/climate/raw', rcp='rcp85') >>> arch.update(5) <Archive /climate/raw/rcp85.nc> bumped 0.0.1 --> 0.0.2 >>> arch.update(3) <Archive /climate/raw/rcp85.nc> bumped 0.0.2 --> 0.0.3 ''' def __init__(self, name, indices): self.name = name self.indices = indices self.version = BumpableVersion('0.0.1') self.value = pd.DataFrame(np.random.random((5, 4))) def __repr__(self): return '<{} {}>'.format(self.__class__.__name__, self.name) def update(self, value): self.value = value old = str(self.version) self.version.bump('patch', inplace=True) print('{} bumped {} --> {}'.format(str(self), old, self.version)) @contextmanager def open(self, *args, **kwargs): print('loading {}'.format(self)) s = io.BytesIO() self.value.to_csv(s) s.seek(0, 0) yield s
def get_version_hash(self, version=None): version = _process_version(self, version) if self.versioned: if version is None: return None for ver in self.get_history(): if BumpableVersion(ver['version']) == version: return ver['checksum'] raise ValueError( 'Version "{}" not found in archive history'.format(version)) else: return self.get_latest_hash()
def get_dependencies(self, version=None): ''' Parameters ---------- version: str string representing version number whose dependencies you are looking up ''' version = _process_version(self, version) history = self.get_history() for v in reversed(history): if BumpableVersion(v['version']) == version: return v['dependencies'] raise ValueError('Version {} not found'.format(version))
def _process_version(self, version): if not self.versioned and version is None: return None elif not self.versioned and version is not None: raise ValueError('Cannot specify version on an unversioned archive.') elif version is None: return self.get_default_version() elif isinstance(version, BumpableVersion): return version elif isinstance(version, string_types) and version == 'latest': return self.get_latest_version() elif isinstance(version, string_types): return BumpableVersion(version)
def __init__(self, name, indices): self.name = name self.indices = indices self.version = BumpableVersion('0.0.1') self.value = pd.DataFrame(np.random.random((5, 4)))
def get_local_path( self, version=None, bumpversion=None, prerelease=None, dependencies=None, metadata=None): ''' Returns a local path for read/write Parameters ---------- version : str Version number of the file to retrieve (default latest) bumpversion : str Version component to update on write if archive is versioned. Valid bumpversion values are 'major', 'minor', and 'patch', representing the three components of the strict version numbering system (e.g. "1.2.3"). If bumpversion is None the version number is not updated on write. Either bumpversion or prerelease (or both) must be a non-None value. If the archive is not versioned, bumpversion is ignored. prerelease : str Prerelease component of archive version to update on write if archive is versioned. Valid prerelease values are 'alpha' and 'beta'. Either bumpversion or prerelease (or both) must be a non-None value. If the archive is not versioned, prerelease is ignored. metadata : dict Updates to archive metadata. Pass {key: None} to remove a key from the archive's metadata. ''' if metadata is None: metadata = {} latest_version = self.get_latest_version() version = _process_version(self, version) version_hash = self.get_version_hash(version) if self.versioned: if latest_version is None: latest_version = BumpableVersion() next_version = latest_version.bump( kind=bumpversion, prerelease=prerelease, inplace=False) msg = "Version must be bumped on write. " \ "Provide bumpversion and/or prerelease." assert next_version > latest_version, msg read_path = self.get_version_path(version) write_path = self.get_version_path(next_version) else: read_path = self.archive_path write_path = self.archive_path next_version = None # version_check returns true if fp's hash is current as of read def version_check(chk): return chk['checksum'] == version_hash # Updater updates the manager with the latest version number def updater(checksum, algorithm): self._update_manager( archive_metadata=metadata, version_metadata=dict( version=next_version, dependencies=dependencies, checksum=checksum, algorithm=algorithm)) path = data_file.get_local_path( self.authority, self.api.cache, updater, version_check, self.api.hash_file, read_path, write_path) with path as fp: yield fp
def update( self, filepath, cache=False, remove=False, bumpversion=None, prerelease=None, dependencies=None, metadata=None): ''' Enter a new version to a DataArchive Parameters ---------- filepath : str The path to the file on your local file system cache : bool Turn on caching for this archive if not already on before update remove : bool removes a file from your local directory bumpversion : str Version component to update on write if archive is versioned. Valid bumpversion values are 'major', 'minor', and 'patch', representing the three components of the strict version numbering system (e.g. "1.2.3"). If bumpversion is None the version number is not updated on write. Either bumpversion or prerelease (or both) must be a non-None value. If the archive is not versioned, bumpversion is ignored. prerelease : str Prerelease component of archive version to update on write if archive is versioned. Valid prerelease values are 'alpha' and 'beta'. Either bumpversion or prerelease (or both) must be a non-None value. If the archive is not versioned, prerelease is ignored. metadata : dict Updates to archive metadata. Pass {key: None} to remove a key from the archive's metadata. ''' if metadata is None: metadata = {} latest_version = self.get_latest_version() hashval = self.api.hash_file(filepath) checksum = hashval['checksum'] algorithm = hashval['algorithm'] if checksum == self.get_latest_hash(): self.update_metadata(metadata) if remove and os.path.isfile(filepath): os.remove(filepath) return if self.versioned: if latest_version is None: latest_version = BumpableVersion() next_version = latest_version.bump( kind=bumpversion, prerelease=prerelease, inplace=False) else: next_version = None next_path = self.get_version_path(next_version) if cache: self.cache(next_version) if self.is_cached(next_version): self.authority.upload(filepath, next_path) self.api.cache.upload(filepath, next_path, remove=remove) else: self.authority.upload(filepath, next_path, remove=remove) self._update_manager( archive_metadata=metadata, version_metadata=dict( checksum=checksum, algorithm=algorithm, version=next_version, dependencies=dependencies))