コード例 #1
0
ファイル: entity.py プロジェクト: denshoproject/ddr-cmdln
    def __init__(self, path_abs, id=None, identifier=None):
        path_abs = os.path.normpath(path_abs)
        if identifier:
            i = identifier
        else:
            i = Identifier(path=path_abs)
        self.identifier = i

        self.id = i.id
        self.idparts = list(i.parts.values())

        self.collection_id = i.collection_id()
        self.parent_id = i.parent_id()

        self.path_abs = path_abs
        self.path = path_abs
        self.collection_path = i.collection_path()
        self.parent_path = i.parent_path()

        self.root = os.path.dirname(self.parent_path)
        self.json_path = i.path_abs('json')
        self.changelog_path = i.path_abs('changelog')
        self.control_path = i.path_abs('control')
        self.mets_path = i.path_abs('mets')
        self.lock_path = i.path_abs('lock')
        self.files_path = i.path_abs('files')

        self.path_rel = i.path_rel()
        self.json_path_rel = i.path_rel('json')
        self.changelog_path_rel = i.path_rel('changelog')
        self.control_path_rel = i.path_rel('control')
        self.mets_path_rel = i.path_rel('mets')
        self.files_path_rel = i.path_rel('files')
コード例 #2
0
ファイル: entity.py プロジェクト: densho/ddr-cmdln
 def __init__( self, path_abs, id=None, identifier=None ):
     path_abs = os.path.normpath(path_abs)
     if identifier:
         i = identifier
     else:
         i = Identifier(path=path_abs)
     self.identifier = i
     
     self.id = i.id
     self.idparts = i.parts.values()
     
     self.collection_id = i.collection_id()
     self.parent_id = i.parent_id()
     
     self.path_abs = path_abs
     self.path = path_abs
     self.collection_path = i.collection_path()
     self.parent_path = i.parent_path()
     
     self.root = os.path.dirname(self.parent_path)
     self.json_path = i.path_abs('json')
     self.changelog_path = i.path_abs('changelog')
     self.control_path = i.path_abs('control')
     self.mets_path = i.path_abs('mets')
     self.lock_path = i.path_abs('lock')
     self.files_path = i.path_abs('files')
     
     self.path_rel = i.path_rel()
     self.json_path_rel = i.path_rel('json')
     self.changelog_path_rel = i.path_rel('changelog')
     self.control_path_rel = i.path_rel('control')
     self.mets_path_rel = i.path_rel('mets')
     self.files_path_rel = i.path_rel('files')
コード例 #3
0
ファイル: docstore.py プロジェクト: raux/ddr-cmdln
 def _repo_org(self, path, doctype, remove=False):
     """
     seealso DDR.models.common.DDRObject.to_esobject
     """
     # get and validate file
     data = load_json(path)
     if (not (data.get('id') and data.get('repo'))):
         raise Exception('Data file is not well-formed.')
     oi = Identifier(id=data['id'])
     d = OrderedDict()
     d['id'] = oi.id
     d['model'] = oi.model
     d['parent_id'] = oi.parent_id(stubs=1)
     # links
     d['links_html'] = oi.id
     d['links_json'] = oi.id
     d['links_img'] = '%s/logo.png' % oi.id
     d['links_thumb'] = '%s/logo.png' % oi.id
     d['links_parent'] = oi.parent_id(stubs=1)
     d['links_children'] = oi.id
     # title,description
     d['title'] = data['title']
     d['description'] = data['description']
     d['url'] = data['url']
     # ID components (repo, org, cid, ...) as separate fields
     idparts = deepcopy(oi.idparts)
     idparts.pop('model')
     for k in ID_COMPONENTS:
         d[k] = ''  # ensure all fields present
     for k, v in idparts.iteritems():
         d[k] = v
     # add/update
     if remove and self.exists(doctype, oi):
         results = self.es.delete(index=self.indexname,
                                  doc_type=doctype,
                                  id=oi.id)
     else:
         results = self.es.index(index=self.indexname,
                                 doc_type=doctype,
                                 id=oi.id,
                                 body=d)
     return results
コード例 #4
0
ファイル: docstore.py プロジェクト: densho/ddr-cmdln
 def _repo_org(self, path, doctype, remove=False):
     """
     seealso DDR.models.common.DDRObject.to_esobject
     """
     # get and validate file
     data = load_json(path)
     if (not (data.get('id') and data.get('repo'))):
         raise Exception('Data file is not well-formed.')
     oi = Identifier(id=data['id'])
     d = OrderedDict()
     d['id'] = oi.id
     d['model'] = oi.model
     d['parent_id'] = oi.parent_id(stubs=1)
     # links
     d['links_html'] = oi.id
     d['links_json'] = oi.id
     d['links_img'] = '%s/logo.png' % oi.id
     d['links_thumb'] = '%s/logo.png' % oi.id
     d['links_parent'] = oi.parent_id(stubs=1)
     d['links_children'] = oi.id
     # title,description
     d['title'] = data['title']
     d['description'] = data['description']
     d['url'] = data['url']
     # ID components (repo, org, cid, ...) as separate fields
     idparts = deepcopy(oi.idparts)
     idparts.pop('model')
     for k in ID_COMPONENTS:
         d[k] = '' # ensure all fields present
     for k,v in idparts.iteritems():
         d[k] = v
     # add/update
     if remove and self.exists(doctype, oi):
         results = self.es.delete(
             index=self.indexname, doc_type=doctype, id=oi.id
         )
     else:
         results = self.es.index(
             index=self.indexname, doc_type=doctype, id=oi.id, body=d
         )
     return results
コード例 #5
0
ファイル: docstore.py プロジェクト: denshoproject/ddr-cmdln
    def _repo_org(self, path, doctype, remove=False):
        """
        seealso DDR.models.common.DDRObject.to_esobject
        """
        # get and validate file
        data = load_json(path)
        if (not (data.get('id') and data.get('repo'))):
            raise Exception('Data file is not well-formed.')
        oi = Identifier(id=data['id'])

        ES_Class = ELASTICSEARCH_CLASSES_BY_MODEL[doctype]
        d = ES_Class(id=oi.id)
        d.meta.id = oi.id
        d.model = oi.model
        d.parent_id = oi.parent_id(stubs=1)
        # links
        d.links_html = oi.id
        d.links_json = oi.id
        d.links_img = '%s/logo.png' % oi.id
        d.links_thumb = '%s/logo.png' % oi.id
        d.links_parent = oi.parent_id(stubs=1)
        d.links_children = oi.id
        # title,description
        d.title = data['title']
        d.description = data['description']
        d.url = data['url']
        # ID components (repo, org, cid, ...) as separate fields
        idparts = deepcopy(oi.idparts)
        idparts.pop('model')
        for key, val in idparts.items():
            setattr(d, key, val)
        # add/update
        if remove and self.exists(doctype, oi):
            results = d.delete(index=self.index_name(doctype), using=self.es)
        else:
            results = d.save(index=self.index_name(doctype), using=self.es)
        return results
コード例 #6
0
ファイル: docstore.py プロジェクト: gjost/ddr-cmdln-bkup
def index( hosts, index, path, recursive=False, public=True ):
    """(Re)index with data from the specified directory.
    
    After receiving a list of metadata files, index() iterates through the list several times.  The first pass weeds out paths to objects that can not be published (e.g. object or its parent is unpublished).
    
    The second pass goes through the files and assigns a signature file to each entity or collection ID.
    There is some logic that tries to pick the first file of the first entity to be the collection signature, and so on.  Mezzanine files are preferred over master files.
    
    In the final pass, a list of public/publishable fields is chosen based on the model.  Additional fields not in the model (e.g. parent ID, parent organization/collection/entity ID, the signature file) are packaged.  Then everything is sent off to post().

    @param hosts: list of dicts containing host information.
    @param index: Name of the target index.
    @param path: Absolute path to directory containing object metadata files.
    @param recursive: Whether or not to recurse into subdirectories.
    @param public: For publication (fields not marked public will be ommitted).
    @param paths: Absolute paths to directory containing collections.
    @returns: number successful,list of paths that didn't work out
    """
    logger.debug('index(%s, %s, %s)' % (hosts, index, path))
    
    publicfields = public_fields()
    
    # process a single file if requested
    if os.path.isfile(path):
        paths = [path]
    else:
        # files listed first, then entities, then collections
        paths = util.find_meta_files(path, recursive, files_first=1)
    
    # Store value of public,status for each collection,entity.
    # Values will be used by entities and files to inherit these values from their parent.
    parents = _parents_status(paths)
    
    # Determine if paths are publishable or not
    successful_paths,bad_paths = _publishable_or_not(paths, parents)
    
    # iterate through paths, storing signature_url for each collection, entity
    # paths listed files first, then entities, then collections
    signature_files = _choose_signatures(successful_paths)
    print('Signature files')
    keys = signature_files.keys()
    keys.sort()
    for key in keys:
        print(key, signature_files[key])
    
    successful = 0
    for path in successful_paths:
        identifier = Identifier(path=path)
        parent_id = identifier.parent_id()
        
        document_pub_fields = []
        if public and identifier.model:
            document_pub_fields = publicfields[identifier.model]
        
        additional_fields = {'parent_id': parent_id}
        if identifier.model == 'collection': additional_fields['organization_id'] = parent_id
        if identifier.model == 'entity': additional_fields['collection_id'] = parent_id
        if identifier.model == 'file': additional_fields['entity_id'] = parent_id
        if identifier.model in ['collection', 'entity']:
            additional_fields['signature_file'] = signature_files.get(identifier.id, '')
        
        # HERE WE GO!
        document = load_document_json(path, identifier.model, identifier.id)
        try:
            existing = get(hosts, index, identifier.model, identifier.id, fields=[])
        except:
            existing = None
        result = post(hosts, index, document, document_pub_fields, additional_fields)
        # success: created, or version number incremented
        if result.get('_id', None):
            if existing:
                existing_version = existing.get('version', None)
                if not existing_version:
                    existing_version = existing.get('_version', None)
            else:
                existing_version = None
            result_version = result.get('version', None)
            if not result_version:
                result_version = result.get('_version', None)
            if result['created'] or (existing_version and (result_version > existing_version)):
                successful += 1
        else:
            bad_paths.append((path, result['status'], result['response']))
            #print(status_code)
    logger.debug('INDEXING COMPLETED')
    return {'total':len(paths), 'successful':successful, 'bad':bad_paths}