def id2path(id): clean_id = id_encode(id) path = [] while len(clean_id) > 0: val, clean_id = clean_id[:2], clean_id[2:] path.append(val) return '/'.join(path)
def set_version_date(self, version, date): if version in self.manifest['versions']: self.manifest['version_dates'][version] = date self.po.add_bytestream_by_path(os.path.join("__"+str(version), "3=%s" % id_encode(date)), date) return True else: logger.error("Version %s does not exist" % version) return False
def _setup_version_dir(self, version, date=None): if not date: if self.manifest['date']: date = self.manifest['date'] else: date = datetime.now().isoformat() if version not in self.manifest['versions']: self.manifest['versions'].append(version) self.manifest['version_dates'][version] = date self.manifest['subdir'][version] = [] self.manifest['metadata_files'][version] = [] self.manifest['files'][version] = [] self.manifest['versionlog'][version] = [] self.set_version_date(version, date) self.po.add_bytestream_by_path(os.path.join("__"+str(version), "4=%s" % id_encode(self.item_id)), self.item_id)
def id_to_rsync(htid, **kwargs): ''' Take an HTRC id and convert it to an Rsync location for syncing Extracted Features ''' if 'kind' in kwargs: logging.warn("The basic/advanced split with extracted features files " "was removed in schema version 3.0. This function only " "supports the current format for Rsync URLs, if you " "would like to see the legacy 2.0 format, see Github: " "https://github.com/htrc/htrc-feature-reader/blob/3e100ae" "9ea45317443ae05f43a188b12afe2e69a/htrc_features/utils.py" ) libid, volid = htid.split('.', 1) volid_clean = id_encode(volid) filename = '.'.join([libid, volid_clean, 'json.bz2']) path = '/'.join([libid, 'pairtree_root', id2path(volid).replace('\\', '/'), volid_clean, filename]) return path
def encode(self, identifier): """Pairtree encode identifier.""" return id_encode(identifier)