예제 #1
0
def object_metadata(module, repo_path):
    """Metadata for the ddrlocal/ddrcmdln and models definitions used.
    
    @param module: collection, entity, files model definitions module
    @param repo_path: Absolute path to root of object's repo
    @returns: dict
    """
    if not config.APP_METADATA:
        repo = dvcs.repository(repo_path)
        config.APP_METADATA['git_version'] = '; '.join([
            dvcs.git_version(repo),
            dvcs.annex_version(repo)
        ])
        # ddr-cmdln
        url = 'https://github.com/densho/ddr-cmdln.git'
        config.APP_METADATA['application'] = url
        config.APP_METADATA['app_path'] = config.INSTALL_PATH
        config.APP_METADATA['app_commit'] = dvcs.latest_commit(
            config.INSTALL_PATH
        )
        config.APP_METADATA['app_release'] = VERSION
        # ddr-defs
        config.APP_METADATA['defs_path'] = modules.Module(module).path
        config.APP_METADATA['defs_commit'] = dvcs.latest_commit(
            modules.Module(module).path
        )
    return config.APP_METADATA
예제 #2
0
def test_Module_is_valid():
    class TestModule0(object):
        __name__ = 'TestModule0'
        __file__ = ''

    class TestModule1(object):
        __name__ = 'TestModule1'
        __file__ = 'ddr/repo_models'

    class TestModule2(object):
        __name__ = 'TestModule2'
        __file__ = 'ddr/repo_models'
        FIELDS = 'not a list'

    class TestModule3(object):
        __name__ = 'TestModule3'
        __file__ = 'ddr/repo_models'
        FIELDS = ['fake fields']

    assert modules.Module(TestModule0()).is_valid() == (
        False, "TestModule0 not in 'ddr' Repository repo.")
    assert modules.Module(
        TestModule1()).is_valid() == (False,
                                      'TestModule1 has no FIELDS variable.')
    assert modules.Module(
        TestModule2()).is_valid() == (False,
                                      'TestModule2.FIELDS is not a list.')
    assert modules.Module(TestModule3()).is_valid() == (True, 'ok')
예제 #3
0
def test_Module_document_commit():
    module = TestModule()
    # commit exists
    document = TestDocument()
    document.object_metadata = {
        "models_commit":
        "20dd4e2096e6f9a9eb7c2db52907b094f41f58de  2015-10-13 17:08:43 -0700",
    }
    expected = '20dd4e2096e6f9a9eb7c2db52907b094f41f58de'
    assert modules.Module(module).document_commit(document) == expected
    # no commit
    document = TestDocument()
    document.object_metadata = {}
    expected = None
    assert modules.Module(module).document_commit(document) == expected
예제 #4
0
    def load_csv(self, rowd):
        """Populate File data from JSON-formatted text.
        
        @param rowd: dict Headers/row cells for one line of a CSV file.
        @returns: list of changed fields
        """
        # remove 'id' from rowd because files.FIELDS has no 'id' field
        # TODO files.FIELDS really should have an ID field...
        if 'id' in rowd.iterkeys():
            rowd.pop('id')
        module = modules.Module(self.identifier.fields_module())
        modified = common.load_csv(self, module, rowd)
        # fill in the blanks
        if self.access_rel:
            access_abs = os.path.join(self.entity_files_path, self.access_rel)
            if os.path.exists(access_abs):
                self.access_abs = access_abs
        # Identifier does not know file extension
        def add_extension(path, ext):
            # add extenstions if not already present
            base, ext = os.path.splitext(path)
            if not ext:
                return path + ext
            return path

        self.ext = os.path.splitext(self.basename_orig)[1]
        self.path_abs = add_extension(self.path_abs, self.ext)
        self.path_rel = add_extension(self.path_rel, self.ext)
        self.basename = add_extension(self.basename, self.ext)
        # fix access_rel
        self.access_rel = os.path.join(os.path.dirname(self.path_rel),
                                       os.path.basename(self.access_abs))
        return modified
예제 #5
0
 def dump_csv(self, fields=[]):
     """Dump Entity data to CSV-formatted text.
     
     @returns: JSON-formatted text
     """
     module = modules.Module(self.identifier.fields_module())
     return common.prep_csv(self, module, fields=fields)
예제 #6
0
 def choices(self, field_name):
     """Returns controlled-vocab choices for specified field, if any
     
     @param field_name: str
     @returns: list or None
     """
     return modules.Module(self.identifier.fields_module()).field_choices(field_name)
예제 #7
0
파일: entity.py 프로젝트: raux/ddr-cmdln
 def load_csv(self, rowd):
     """Populate Entity data from CSV-formatted text.
     
     @param rowd: dict Headers/row cells for one line of a CSV file.
     @returns: list of changed fields
     """
     module = modules.Module(self.identifier.fields_module())
     modified = common.load_csv(self, module, rowd)
     ## special cases
     #def parsedt(txt):
     #    d = datetime.now(config.TZ)
     #    try:
     #        d = converters.text_to_datetime(txt)
     #    except:
     #        try:
     #            d = converters.text_to_datetime(txt)
     #        except:
     #            pass
     #    return d
     if not hasattr(self, 'record_created'):
         self.record_created = datetime.now(config.TZ)
     if modified and hasattr(self, 'record_lastmod'):
         self.record_lastmod = datetime.now(config.TZ)
     self.rm_file_duplicates()
     return modified
예제 #8
0
def test_Module_labels_values():
    module = TestModule()
    document = TestDocument()
    data = [
        {
            'id': 'ddr-test-123'
        },
        {
            'modified': '2015-10-20T15:42:26'
        },
        {
            'title': 'labels_values'
        },
    ]
    json_data = models.common.load_json(document, module, json.dumps(data))
    expected = [{
        'value': u'ddr-test-123',
        'label': 'Object ID'
    }, {
        'value': u'2015-10-20T15:42:26',
        'label': 'Last Modified'
    }, {
        'value': u'labels_values',
        'label': 'Title'
    }]
    assert modules.Module(module).labels_values(document) == expected
예제 #9
0
def test_Module_path():
    class TestModule(object):
        pass

    module = TestModule()
    module.__file__ = '/var/www/media/base/ddr/repo_models/testmodule.pyc'
    assert modules.Module(
        module).path == '/var/www/media/base/ddr/repo_models/testmodule.py'
예제 #10
0
def test_Module_function():
    class TestModule(object):
        def hello(self, text):
            return 'hello %s' % text

    module = TestModule()
    module.__file__ = 'ddr/repo_models'
    assert modules.Module(module).function('hello', 'world') == 'hello world'
예제 #11
0
파일: common.py 프로젝트: raux/ddr-cmdln
def object_metadata(module, repo_path):
    """Metadata for the ddrlocal/ddrcmdln and models definitions used.
    
    @param module: collection, entity, files model definitions module
    @param repo_path: Absolute path to root of object's repo
    @returns: dict
    """
    repo = dvcs.repository(repo_path)
    gitversion = '; '.join([dvcs.git_version(repo), dvcs.annex_version(repo)])
    data = {
        'application': 'https://github.com/densho/ddr-cmdln.git',
        'app_commit': dvcs.latest_commit(config.INSTALL_PATH),
        'app_release': VERSION,
        'defs_path': modules.Module(module).path,
        'models_commit': dvcs.latest_commit(modules.Module(module).path),
        'git_version': gitversion,
    }
    return data
예제 #12
0
 def dump_csv(self, fields=[]):
     """Dump File data to list of values suitable for CSV.
     
     @returns: list of values
     """
     # make sure we export 'id' if it's not in model FIELDS (ahem, files)
     if 'id' not in fields:
         fields.insert(0, 'id')
     module = modules.Module(self.identifier.fields_module())
     if self.basename and not self.mimetype:
         self.mimetype = self.get_mimetype(force=True)
     return common.prep_csv(self, module, fields=fields)
예제 #13
0
파일: docstore.py 프로젝트: raux/ddr-cmdln
def _filter_fields(i, data):
    """Run index_* functions on data
    
    @param i: Identifier
    @param data: dict
    @returns: dict data
    """
    module = i.fields_module()
    for field in module.FIELDS:
        fieldname = field['name']
        # run index_* functions on field data if present
        data[fieldname] = modules.Module(module).function(
            'index_%s' % fieldname, data[fieldname])
    return data
예제 #14
0
파일: common.py 프로젝트: raux/ddr-cmdln
def load_json(document, module, json_text):
    """Populates object from JSON-formatted text; applies jsonload_{field} functions.
    
    Goes through module.FIELDS turning data in the JSON file into
    object attributes.
    TODO content fields really should into OBJECT.data OrderedDict or subobject.
    
    @param document: Collection/Entity/File object.
    @param module: collection/entity/file module from 'ddr' repo.
    @param json_text: JSON-formatted text
    @returns: dict
    """
    try:
        json_data = json.loads(json_text)
    except ValueError:
        json_data = [
            {
                'title': 'ERROR: COULD NOT READ DATA (.JSON) FILE!'
            },
            {
                '_error': 'Error: ValueError during read load_json.'
            },
        ]
    # software and commit metadata
    for field in json_data:
        if is_object_metadata(field):
            setattr(document, 'object_metadata', field)
            break
    # field values from JSON
    for mf in module.FIELDS:
        for f in json_data:
            if hasattr(f, 'keys') and (f.keys()[0] == mf['name']):
                fieldname = f.keys()[0]
                # run jsonload_* functions on field data if present
                field_data = modules.Module(module).function(
                    'jsonload_%s' % fieldname,
                    f.values()[0])
                if isinstance(field_data, basestring):
                    field_data = field_data.strip()
                setattr(document, fieldname, field_data)
    # Fill in missing fields with default values from module.FIELDS.
    # Note: should not replace fields that are just empty.
    for mf in module.FIELDS:
        if not hasattr(document, mf['name']):
            setattr(document, mf['name'], mf.get('default', None))
    # Add timeszone to fields if not present
    apply_timezone(document, module)
    return json_data
예제 #15
0
파일: common.py 프로젝트: raux/ddr-cmdln
def dump_json(obj,
              module,
              template=False,
              template_passthru=['id', 'record_created', 'record_lastmod'],
              exceptions=[]):
    """Arranges object data in list-of-dicts format before serialization.
    
    DDR keeps data in Git is to take advantage of versioning.  Python
    dicts store data in random order which makes it impossible to
    meaningfully compare diffs of the data over time.  DDR thus stores
    data as an alphabetically arranged list of dicts, with several
    exceptions.
    
    The first dict in the list is not part of the object itself but
    contains metadata about the state of the DDR application at the time
    the file was last written: the Git commit of the app, the release
    number, and the versions of Git and git-annex used.
    
    Python data types that cannot be represented in JSON (e.g. datetime)
    are converted into strings.
    
    @param obj: Collection/Entity/File object.
    @param module: modules.Module
    @param template: Boolean True if object to be used as blank template.
    @param template_passthru: list
    @param exceptions: list
    @returns: dict
    """
    data = []
    for mf in module.FIELDS:
        item = {}
        fieldname = mf['name']
        field_data = ''
        if template and (fieldname not in template_passthru) and hasattr(
                mf, 'form'):
            # write default values
            field_data = mf['form']['initial']
        elif hasattr(obj, mf['name']):
            # run jsondump_* functions on field data if present
            field_data = modules.Module(module).function(
                'jsondump_%s' % fieldname, getattr(obj, fieldname))
        item[fieldname] = field_data
        if fieldname not in exceptions:
            data.append(item)
    return data
예제 #16
0
def test_Module_cmp_model_definition_fields():
    module = TestModule()
    module.FIELDS = [
        {
            'name': 'id',
        },
        {
            'name': 'modified',
        },
        {
            'name': 'title',
        },
    ]
    m = modules.Module(module)
    data = [
        {},  # object_metadata
        {
            'id': 'ddr-test-123'
        },
        {
            'modified': '2015-10-20T15:42:26'
        },
        {
            'title': 'labels_values'
        },
    ]

    expected0 = {'removed': [], 'added': []}
    out0 = m.cmp_model_definition_fields(json.dumps(data))

    data.append({'new': 'new field'})
    expected1 = {'removed': [], 'added': ['new']}
    out1 = m.cmp_model_definition_fields(json.dumps(data))

    data.pop()  # rm new
    data.pop()  # rm title
    expected2 = {'removed': ['title'], 'added': []}
    out2 = m.cmp_model_definition_fields(json.dumps(data))

    assert out0 == expected0
    assert out1 == expected1
    assert out2 == expected2
예제 #17
0
파일: common.py 프로젝트: raux/ddr-cmdln
def form_prep(document, module):
    """Apply formprep_{field} functions to prep data dict to pass into DDRForm object.
    
    Certain fields require special processing.  Data may need to be massaged
    and prepared for insertion into particular Django form objects.
    If a "formprep_{field}" function is present in the collectionmodule
    it will be executed.
    
    @param document: Collection, Entity, File document object
    @param module: collection, entity, files model definitions module
    @returns data: dict object as used by Django Form object.
    """
    data = {}
    for f in module.FIELDS:
        if hasattr(document, f['name']) and f.get('form', None):
            fieldname = f['name']
            # run formprep_* functions on field data if present
            field_data = modules.Module(module).function(
                'formprep_%s' % fieldname, getattr(document, f['name']))
            data[fieldname] = field_data
    return data
예제 #18
0
파일: common.py 프로젝트: raux/ddr-cmdln
def form_post(document, module, cleaned_data):
    """Apply formpost_{field} functions to process cleaned_data from CollectionForm
    
    Certain fields require special processing.
    If a "formpost_{field}" function is present in the entitymodule
    it will be executed.
    NOTE: cleaned_data must contain items for all module.FIELDS.
    
    @param document: Collection, Entity, File document object
    @param module: collection, entity, files model definitions module
    @param cleaned_data: dict cleaned_data from DDRForm
    """
    for f in module.FIELDS:
        if hasattr(document, f['name']) and f.get('form', None):
            fieldname = f['name']
            # run formpost_* functions on field data if present
            field_data = modules.Module(module).function(
                'formpost_%s' % fieldname, cleaned_data[fieldname])
            setattr(document, fieldname, field_data)
    # update record_lastmod
    if hasattr(document, 'record_lastmod'):
        document.record_lastmod = datetime.now(config.TZ)
예제 #19
0
파일: common.py 프로젝트: raux/ddr-cmdln
 def labels_values(self):
     """Apply display_{field} functions to prep object data for the UI.
     """
     module = self.identifier.fields_module()
     return modules.Module(module).labels_values(self)
예제 #20
0
    def post(self,
             document,
             public_fields=[],
             additional_fields={},
             parents={},
             force=False):
        """Add a new document to an index or update an existing one.
        
        This function can produce ElasticSearch documents in two formats:
        - old-style list-of-dicts used in the DDR JSON files.
        - normal dicts used by ddr-public.
        
        DDR metadata JSON files are structured as a list of fieldname:value dicts.
        This is done so that the fields are always in the same order, making it
        possible to easily see the difference between versions of a file.
        [IMPORTANT: documents MUST contain an 'id' field!]
        
        In ElasticSearch, documents are structured in a normal dict so that faceting
        works properly.
        
        curl -XPUT 'http://localhost:9200/ddr/collection/ddr-testing-141' -d '{ ... }'
        
        @param document: Collection,Entity,File The object to post.
        @param public_fields: list
        @param additional_fields: dict
        @param parents: dict Basic metadata for parent documents.
        @param force: boolean Bypass status and public checks.
        @returns: JSON dict with status code and response
        """
        logger.debug('post(%s, %s, %s)' % (self.indexname, document, force))

        if force:
            publishable = True
        else:
            if not parents:
                parents = _parents_status([document.identifier.path_abs()])
            publishable = _publishable([document.identifier.path_abs()],
                                       parents)
        if not publishable:
            return {'status': 403, 'response': 'object not publishable'}

        # instantiate appropriate subclass of ESObject / DocType
        # TODO Devil's advocate: why are we doing this? We already have the object.
        ES_Class = ELASTICSEARCH_CLASSES_BY_MODEL[document.identifier.model]
        d = ES_Class()
        fields_module = document.identifier.fields_module()
        d.meta.id = document.identifier.id
        for fieldname in doctype_fields(ES_Class):

            # index_* for complex fields
            if hasattr(fields_module, 'index_%s' % fieldname):
                field_data = modules.Module(fields_module).function(
                    'index_%s' % fieldname,
                    getattr(document, fieldname),
                )

            # everything else
            else:
                try:
                    field_data = getattr(document, fieldname)
                except AttributeError as err:
                    field_data = None

            if field_data:
                setattr(d, fieldname, field_data)

        # Add parts of id (e.g. repo, org, cid) to document as separate fields.
        for key in ['repo', 'org', 'cid', 'eid', 'sid', 'role', 'sha1']:
            setattr(d, key, document.identifier.parts.get(key, ''))

        d.collection_id = document.identifier.collection_id()
        if d.collection_id and (d.collection_id != document.identifier.id):
            # we don't want file-role (a stub) as parent
            d.parent_id = document.identifier.parent_id(stubs=0)
        else:
            # but we do want repository,organization (both stubs)
            d.parent_id = document.identifier.parent_id(stubs=1)

        logger.debug('saving')
        status = d.save(using=self.es, index=self.indexname)
        logger.debug(str(status))
        return status
예제 #21
0
def test_Module_parse_commit():
    module = TestModule()
    text = '95a3a0ed3232990ee8fbbc3065a11316bccd0b35  2015-03-26 15:49:58 -0700'
    expected = '95a3a0ed3232990ee8fbbc3065a11316bccd0b35'
    assert modules.Module(module)._parse_commit(text) == expected
예제 #22
0
파일: common.py 프로젝트: raux/ddr-cmdln
    def to_esobject(self, public_fields=[], public=True):
        """Returns an Elasticsearch DSL version of the object
        
        @param public_fields: list
        @param public: boolean
        @returns: subclass of repo_models.elastic.ESObject
        """
        # instantiate appropriate subclass of ESObject / DocType
        # TODO Devil's advocate: why are we doing this? We already have the object.
        ES_Class = ELASTICSEARCH_CLASSES_BY_MODEL[self.identifier.model]
        fields_module = self.identifier.fields_module()
        if not public_fields:
            public_fields = [
                f['name'] for f in fields_module.FIELDS
                if f['elasticsearch']['public']
            ]

        img_path = ''
        if hasattr(self,
                   'mimetype') and (self.mimetype
                                    == 'text/html'):  # TODO knows too much!!!
            img_path = os.path.join(
                self.identifier.collection_id(),
                '%s.htm' % self.id,
            )
        elif hasattr(self, 'access_rel'):
            img_path = os.path.join(
                self.identifier.collection_id(),
                os.path.basename(self.access_rel),
            )
        elif self.signature_id:
            img_path = os.path.join(
                self.identifier.collection_id(),
                access_filename(self.signature_id),
            )

        download_path = ''
        if (self.identifier.model in ['file']):
            download_path = os.path.join(
                self.identifier.collection_id(),
                '%s%s' % (self.id, self.ext),
            )

        d = ES_Class()
        d.meta.id = self.identifier.id
        d.id = self.identifier.id
        d.model = self.identifier.model
        if self.identifier.collection_id() != self.identifier.id:
            # we don't want file-role (a stub) as parent
            d.parent_id = self.identifier.parent_id(stubs=0)
        else:
            # but we do want repository,organization (both stubs)
            d.parent_id = self.identifier.parent_id(stubs=1)
        d.organization_id = self.identifier.organization_id()
        d.collection_id = self.identifier.collection_id()
        d.signature_id = self.signature_id
        if hasattr(self, 'ddrpublic_template_key'):
            signature, template_key = self.ddrpublic_template_key()
            if template_key:
                d.template = template_key
        # ID components (repo, org, cid, ...) as separate fields
        idparts = deepcopy(self.identifier.idparts)
        idparts.pop('model')
        for k in ID_COMPONENTS:
            setattr(d, k, '')  # ensure all fields present
        for k, v in idparts.iteritems():
            setattr(d, k, v)
        # links
        d.links_html = self.identifier.id
        d.links_json = self.identifier.id
        d.links_parent = self.identifier.parent_id(stubs=True)
        d.links_children = self.identifier.id
        d.links_img = img_path
        d.links_thumb = img_path
        # title,description
        if hasattr(self, 'title'): d.title = self.title
        else: d.title = self.label
        if hasattr(self, 'description'): d.description = self.description
        else: d.description = ''
        # breadcrumbs
        d.lineage = [{
            'id': i.id,
            'model': i.model,
            'idpart': str(MODELS_IDPARTS[i.model][-1][-1]),
            'label': str(i.idparts[MODELS_IDPARTS[i.model][-1][-1]]),
        } for i in self.identifier.lineage(stubs=0)]
        # module-specific fields
        if hasattr(ES_Class, 'list_fields'):
            setattr(d, '_fields', ES_Class.list_fields())
        # module-specific fields
        for fieldname in docstore.doctype_fields(ES_Class):
            # hide non-public fields if this is public
            if public and (fieldname not in public_fields):
                continue
            # complex fields use repo_models.MODEL.index_FIELD if present
            if hasattr(fields_module, 'index_%s' % fieldname):
                field_data = modules.Module(fields_module).function(
                    'index_%s' % fieldname,
                    getattr(self, fieldname),
                )
            else:
                try:
                    field_data = getattr(self, fieldname)
                except AttributeError as err:
                    field_data = None
            if field_data:
                setattr(d, fieldname, field_data)
        # "special" fields
        if (self.identifier.model in ['entity', 'segment']):
            # TODO find a way to search on creators.id
            # narrator_id
            for c in self.creators:
                try:
                    d.narrator_id = c['id']
                except:
                    pass
            # topics & facility are too hard to search as nested objects
            # so attach extra 'topics_id' and 'facility_id' fields
            d.topics_id = [item['id'] for item in self.topics]
            d.facility_id = [item['id'] for item in self.facility]
        if (self.identifier.model in ['segment']):
            d.ia_meta = archivedotorg.download_segment_meta(self.identifier.id)
        if (self.identifier.model in ['file']):
            if download_path:
                d.links_download = download_path
        return d