def __init__(self, path_abs, id=None, identifier=None): path_abs = os.path.normpath(path_abs) if identifier: i = identifier else: i = Identifier(path=path_abs) self.identifier = i self.id = i.id self.idparts = list(i.parts.values()) self.collection_id = i.collection_id() self.parent_id = i.parent_id() self.path_abs = path_abs self.path = path_abs self.collection_path = i.collection_path() self.parent_path = i.parent_path() self.root = os.path.dirname(self.parent_path) self.json_path = i.path_abs('json') self.changelog_path = i.path_abs('changelog') self.control_path = i.path_abs('control') self.mets_path = i.path_abs('mets') self.lock_path = i.path_abs('lock') self.files_path = i.path_abs('files') self.path_rel = i.path_rel() self.json_path_rel = i.path_rel('json') self.changelog_path_rel = i.path_rel('changelog') self.control_path_rel = i.path_rel('control') self.mets_path_rel = i.path_rel('mets') self.files_path_rel = i.path_rel('files')
def __init__( self, path_abs, id=None, identifier=None ): path_abs = os.path.normpath(path_abs) if identifier: i = identifier else: i = Identifier(path=path_abs) self.identifier = i self.id = i.id self.idparts = i.parts.values() self.collection_id = i.collection_id() self.parent_id = i.parent_id() self.path_abs = path_abs self.path = path_abs self.collection_path = i.collection_path() self.parent_path = i.parent_path() self.root = os.path.dirname(self.parent_path) self.json_path = i.path_abs('json') self.changelog_path = i.path_abs('changelog') self.control_path = i.path_abs('control') self.mets_path = i.path_abs('mets') self.lock_path = i.path_abs('lock') self.files_path = i.path_abs('files') self.path_rel = i.path_rel() self.json_path_rel = i.path_rel('json') self.changelog_path_rel = i.path_rel('changelog') self.control_path_rel = i.path_rel('control') self.mets_path_rel = i.path_rel('mets') self.files_path_rel = i.path_rel('files')
def __init__(self, path_abs, id=None, identifier=None): """ >>> c = Collection('/tmp/ddr-testing-123') >>> c.id 'ddr-testing-123' >>> c.ead_path_rel 'ead.xml' >>> c.ead_path '/tmp/ddr-testing-123/ead.xml' >>> c.json_path_rel 'collection.json' >>> c.json_path '/tmp/ddr-testing-123/collection.json' """ path_abs = os.path.normpath(path_abs) if identifier: i = identifier else: i = Identifier(path=path_abs) self.identifier = i self.id = i.id self.idparts = i.parts.values() self.path_abs = path_abs self.path = path_abs self.root = os.path.split(self.path)[0] self.json_path = i.path_abs('json') self.git_path = i.path_abs('git') self.gitignore_path = i.path_abs('gitignore') self.annex_path = i.path_abs('annex') self.changelog_path = i.path_abs('changelog') self.control_path = i.path_abs('control') self.ead_path = i.path_abs('ead') self.lock_path = i.path_abs('lock') self.files_path = i.path_abs('files') self.path_rel = i.path_rel() self.json_path_rel = i.path_rel('json') self.git_path_rel = i.path_rel('git') self.gitignore_path_rel = i.path_rel('gitignore') self.annex_path_rel = i.path_rel('annex') self.changelog_path_rel = i.path_rel('changelog') self.control_path_rel = i.path_rel('control') self.ead_path_rel = i.path_rel('ead') self.files_path_rel = i.path_rel('files') self.git_url = '{}:{}'.format(config.GITOLITE, self.id)
def delete(self, document_id, recursive=False): """Delete a document and optionally its children. TODO refactor after upgrading Elasticsearch past 2.4. delete_by_query was removed sometime during elasticsearch-py 2.* I think it was added back in a later version so the code stays for now. For now, instead of deleting based on document_id, we start with document_id, find all paths beneath it in the filesystem, and curl DELETE url each individual document from Elasticsearch. @param document_id: @param recursive: True or False """ logger.debug('delete(%s, %s)' % (document_id, recursive)) oi = Identifier(document_id, config.MEDIA_BASE) if recursive: paths = util.find_meta_files(oi.path_abs(), recursive=recursive, files_first=1) else: paths = [oi.path_abs()] identifiers = [Identifier(path) for path in paths] num = len(identifiers) for n, oi in enumerate(identifiers): # TODO hard-coded models here! if oi.model == 'segment': model = 'entity' else: model = oi.model try: result = self.es.delete(index=self.index_name(model), id=oi.id) print( f'{n}/{num} DELETE {self.index_name(model)} {oi.id} -> {result["result"]}' ) except docstore.NotFoundError as err: print( f'{n}/{num} DELETE {self.index_name(model)} {oi.id} -> 404 Not Found' )
def signature_abs(obj, basepath): """Absolute path to signature image file, if signature_id present. Expects obj.signature_id to be either a valid file ID or a special interview signature image (ex. "denshovh-aart-03", "denshovh-hlarry_g-02") @returns: str absolute path to signature img, or None """ if isinstance(obj, dict): sid = obj.get('signature_id') else: sid = getattr(obj, 'signature_id', None) # ignore interview signature ID if sid and INTERVIEW_SIG_REGEX.match(sid): return None if sid: try: oi = Identifier(sid, basepath) except: oi = None if oi and oi.model == 'file': return oi.path_abs('access') return None