Example #1
0
def indexer_receiver(sender,
                     json=None,
                     record=None,
                     index=None,
                     **dummy_kwargs):
    """Connect to before_record_index signal to transform record for ES."""
    def string_to_object(val, field_name):
        return {field_name: val}

    if 'keywords' in json and len(json['keywords']) > 0 and isinstance(
            json['keywords'][0], str):
        json['keywords'] = [
            string_to_object(s, 'keyword') for s in json.get('keywords', )
        ]
    if 'disciplines' in json and len(json['disciplines']) > 0 and isinstance(
            json['disciplines'][0], str):
        json['disciplines'] = [
            string_to_object(s, 'discipline_name') for s in json['disciplines']
        ]

    if 'external_pids' in json['_deposit']:
        # Keep the 'external_pids' if the record is a draft (deposit) or
        # if the files are public.
        if (not is_deposit(record.model) and allow_public_file_metadata(json)):
            json['external_pids'] = json['_deposit']['external_pids']
        del json['_deposit']['external_pids']
    if not index.startswith('records'):
        return
    try:
        if '_files' in json:
            if not allow_public_file_metadata(json):
                for f in json['_files']:
                    del f['key']
        del json['_deposit']
        json['_created'] = pytz.utc.localize(record.created).isoformat()
        json['_updated'] = pytz.utc.localize(record.updated).isoformat()
        json['owners'] = record['_deposit']['owners']
        json['_internal'] = dict()

        # add the 'is_last_version' flag
        parent_pid = b2share_parent_pid_fetcher(None, record).pid_value
        pid = b2share_record_uuid_fetcher(None, record).pid_value
        last_version_pid = PIDVersioning(
            parent=RecordUUIDProvider.get(parent_pid).pid).last_child
        json['_internal']['is_last_version'] = \
            (last_version_pid.pid_value == pid)

        # insert the bucket id for link generation in search results
        record_buckets = RecordsBuckets.query.filter(
            RecordsBuckets.record_id == record.id).all()
        if record_buckets:
            json['_internal']['files_bucket_id'] = \
                str(record_buckets[0].bucket_id)
    except Exception:
        raise
Example #2
0
def indexer_receiver(sender, json=None, record=None, index=None,
                     **dummy_kwargs):
    """Connect to before_record_index signal to transform record for ES."""
    if not index.startswith('records'):
        return
    try:
        if '_files' in json:
            if not allow_public_file_metadata(json):
                for f in json['_files']:
                    del f['key']
        del json['_deposit']
        json['_created'] = pytz.utc.localize(record.created).isoformat()
        json['_updated'] = pytz.utc.localize(record.updated).isoformat()
        json['owners'] = record['_deposit']['owners']
        json['_internal'] = dict()

        # add the 'is_last_version' flag
        parent_pid = b2share_parent_pid_fetcher(None, record).pid_value
        pid = b2share_record_uuid_fetcher(None, record).pid_value
        last_version_pid = PIDVersioning(
            parent=RecordUUIDProvider.get(parent_pid).pid
        ).last_child
        json['_internal']['is_last_version'] = \
            (last_version_pid.pid_value == pid)

        # insert the bucket id for link generation in search results
        record_buckets = RecordsBuckets.query.filter(
            RecordsBuckets.record_id == record.id).all()
        if record_buckets:
            json['_internal']['files_bucket_id'] = \
                str(record_buckets[0].bucket_id)
    except Exception:
        raise
Example #3
0
 def filter_internal(self, data):
     """Remove internal fields from the record metadata."""
     if '_deposit' in data['metadata']:
         data['metadata']['owners'] = data['metadata']['_deposit']['owners']
         del data['metadata']['_deposit']
     if '_files' in data['metadata']:
         if allow_public_file_metadata(data['metadata']):
             data['files'] = data['metadata']['_files']
         del data['metadata']['_files']
     if '_pid' in data['metadata']:
         epic_pids = [
             p for p in data['metadata']['_pid']
             if p.get('type') == 'ePIC_PID'
         ]
         dois = [
             p for p in data['metadata']['_pid'] if p.get('type') == 'DOI'
         ]
         if len(epic_pids) > 0:
             data['metadata']['ePIC_PID'] = epic_pids[0].get('value')
         if len(dois) > 0:
             data['metadata']['DOI'] = DOI_URL_PREFIX + dois[0].get('value')
         del data['metadata']['_pid']
     if '_oai' in data['metadata']:
         del data['metadata']['_oai']
     if '_internal' in data['metadata']:
         del data['metadata']['_internal']
     return data
Example #4
0
def indexer_receiver(sender,
                     json=None,
                     record=None,
                     index=None,
                     **dummy_kwargs):
    """Connect to before_record_index signal to transform record for ES."""
    if not index.startswith('records'):
        return
    try:
        if '_files' in json:
            if not allow_public_file_metadata(json):
                for f in json['_files']:
                    del f['key']
        del json['_deposit']
        json['_created'] = pytz.utc.localize(record.created).isoformat()
        json['_updated'] = pytz.utc.localize(record.updated).isoformat()
        json['owners'] = record['_deposit']['owners']

        # insert the bucket id for link generation in search results
        record_buckets = RecordsBuckets.query.filter(
            RecordsBuckets.record_id == record.id).all()
        if record_buckets:
            json['_internal'] = {
                'files_bucket_id': str(record_buckets[0].bucket_id),
            }
    except Exception:
        raise
Example #5
0
    def filter_internal(self, data):
        """Remove internal fields from the record metadata."""
        if '_deposit' in data['metadata']:
            data['metadata']['owners'] = data['metadata']['_deposit']['owners']
            del data['metadata']['_deposit']
        if '_files' in data['metadata']:
            if allow_public_file_metadata(data['metadata']):
                data['files'] = data['metadata']['_files']
            del data['metadata']['_files']
        if '_pid' in data['metadata']:
            # move PIDs to metadata top level
            epic_pids = [p for p in data['metadata']['_pid']
                         if p.get('type') == 'ePIC_PID']
            dois = [p for p in data['metadata']['_pid']
                    if p.get('type') == 'DOI']
            if len(epic_pids) > 0:
                data['metadata']['ePIC_PID'] = epic_pids[0].get('value')
            if len(dois) > 0:
                data['metadata']['DOI'] = DOI_URL_PREFIX + dois[0].get('value')

            # add parent version pid
            # data['metadata']['parent_id'] = next(
            #     pid['value'] for pid in data['metadata']['_pid']
            #     if pid['type'] == RecordUUIDProvider.parent_pid_type
            # )
            del data['metadata']['_pid']
        if '_oai' in data['metadata']:
            del data['metadata']['_oai']
        if '_internal' in data['metadata']:
            del data['metadata']['_internal']
        return data
Example #6
0
def indexer_receiver(sender, json=None, record=None, index=None,
                     **dummy_kwargs):
    """Connect to before_record_index signal to transform record for ES."""

    from b2share.modules.access.policies import allow_public_file_metadata
    from b2share.modules.records.fetchers import b2share_parent_pid_fetcher, b2share_record_uuid_fetcher

    if 'external_pids' in json['_deposit']:
        # Keep the 'external_pids' if the record is a draft (deposit) or
        # if the files are public.
        if (not is_deposit(record.model) and allow_public_file_metadata(json)):
            json['external_pids'] = json['_deposit']['external_pids']
        del json['_deposit']['external_pids']
    if not index.startswith('records'):
        return
    try:
        if '_files' in json:
            if not allow_public_file_metadata(json):
                for f in json['_files']:
                    del f['key']
        del json['_deposit']
        json['_created'] = pytz.utc.localize(record.created).isoformat()
        json['_updated'] = pytz.utc.localize(record.updated).isoformat()
        json['owners'] = record['_deposit']['owners']
        json['_internal'] = dict()

        # add the 'is_last_version' flag
        parent_pid = b2share_parent_pid_fetcher(None, record).pid_value
        pid = b2share_record_uuid_fetcher(None, record).pid_value
        last_version_pid = PIDNodeVersioning(
            pid=RecordUUIDProvider.get(parent_pid).pid
        ).last_child
        json['_internal']['is_last_version'] = \
            (last_version_pid.pid_value == pid)

        # insert the bucket id for link generation in search results
        record_buckets = RecordsBuckets.query.filter(
            RecordsBuckets.record_id == record.id).all()
        if record_buckets:
            json['_internal']['files_bucket_id'] = \
                str(record_buckets[0].bucket_id)
    except Exception:
        raise
Example #7
0
    def filter_internal(self, data):
        """Remove internal fields from the record metadata."""
        external_pids = []
        bucket = None
        record = None
        # differentiating between search results and
        # single record requests
        if hasattr(g, 'record'):
            record = g.record
            if record.files:
                bucket = record.files.bucket
            if is_deposit(record.model):
                external_pids = generate_external_pids(record)
            # if it is a published record don't generate external pids
            # as they are immutable and stored in _deposit
            else:
                external_pids = record.model.json[
                    '_deposit'].get('external_pids')
            user_has_permission = \
                allow_public_file_metadata(data['metadata']) if bucket \
                is None else files_permission_factory(
                    bucket, 'bucket-read').can()
        elif hasattr(g, 'record_hit'):
            user_has_permission = allow_public_file_metadata(
                g.record_hit['_source'])

        if '_deposit' in data['metadata']:
            data['metadata']['owners'] = data['metadata']['_deposit']['owners']

            # Add the external_pids only if the
            # user is allowed to read the bucket
            if external_pids and bucket and user_has_permission:
                data['metadata']['external_pids'] = external_pids
            del data['metadata']['_deposit']
        if '_files' in data['metadata']:
            # Also add the files field only if the user is allowed
            if user_has_permission:
                data['files'] = data['metadata']['_files']
                if external_pids and bucket:
                    external_dict = {x['key']: x['ePIC_PID']
                                     for x in external_pids}
                    for _file in data['files']:
                        if _file['key'] in external_dict:
                            _file['b2safe'] = True
                            _file['ePIC_PID'] = external_dict[_file['key']]
            del data['metadata']['_files']
        if '_pid' in data['metadata']:
            # move PIDs to metadata top level
            epic_pids = [p for p in data['metadata']['_pid']
                         if p.get('type') == 'ePIC_PID']
            dois = [p for p in data['metadata']['_pid']
                    if p.get('type') == 'DOI']
            if len(epic_pids) > 0:
                data['metadata']['ePIC_PID'] = epic_pids[0].get('value')
            if len(dois) > 0:
                data['metadata']['DOI'] = DOI_URL_PREFIX + dois[0].get('value')

            # add parent version pid
            # data['metadata']['parent_id'] = next(
            #     pid['value'] for pid in data['metadata']['_pid']
            #     if pid['type'] == RecordUUIDProvider.parent_pid_type
            # )
            del data['metadata']['_pid']
        if '_oai' in data['metadata']:
            del data['metadata']['_oai']
        if '_internal' in data['metadata']:
            del data['metadata']['_internal']
        return data
Example #8
0
    def filter_internal(self, data):
        """Remove internal fields from the record metadata."""
        external_pids = []
        bucket = None
        record = None
        # differentiating between search results and
        # single record requests
        if hasattr(g, 'record'):
            record = g.record
            if record.files:
                bucket = record.files.bucket
            if is_deposit(record.model):
                external_pids = generate_external_pids(record)
            # if it is a published record don't generate external pids
            # as they are immutable and stored in _deposit
            else:
                external_pids = record.model.json[
                    '_deposit'].get('external_pids')
            user_has_permission = \
                allow_public_file_metadata(data['metadata']) if bucket \
                is None else files_permission_factory(
                    bucket, 'bucket-read').can()
        elif hasattr(g, 'record_hit'):
            user_has_permission = allow_public_file_metadata(
                g.record_hit['_source'])

        if '_deposit' in data['metadata']:
            if hasattr(g, 'record') and is_deposit(record.model) and current_app.config.get('AUTOMATICALLY_ASSIGN_DOI', False):
                # add future DOI string
                data['metadata'].update({'$future_doi': generate_doi(data['metadata']['_deposit']['id']) })

            data['metadata']['owners'] = data['metadata']['_deposit']['owners']

            # Add the external_pids only if the
            # user is allowed to read the bucket
            if external_pids and bucket and user_has_permission:
                data['metadata']['external_pids'] = external_pids
            del data['metadata']['_deposit']
        if '_files' in data['metadata']:
            # Also add the files field only if the user is allowed
            if user_has_permission:
                data['files'] = data['metadata']['_files']
                if external_pids and bucket:
                    external_dict = {x['key']: x['ePIC_PID']
                                     for x in external_pids}
                    for _file in data['files']:
                        if _file['key'] in external_dict:
                            _file['b2safe'] = True
                            _file['ePIC_PID'] = external_dict[_file['key']]
            del data['metadata']['_files']
        if '_pid' in data['metadata']:
            # move PIDs to metadata top level
            epic_pids = [p for p in data['metadata']['_pid']
                         if p.get('type') == 'ePIC_PID']
            dois = [p for p in data['metadata']['_pid']
                    if p.get('type') == 'DOI']
            if len(epic_pids) > 0:
                data['metadata']['ePIC_PID'] = epic_pids[0].get('value')
            if len(dois) > 0:
                data['metadata']['DOI'] = DOI_URL_PREFIX + dois[0].get('value')

            # add parent version pid
            # data['metadata']['parent_id'] = next(
            #     pid['value'] for pid in data['metadata']['_pid']
            #     if pid['type'] == RecordUUIDProvider.parent_pid_type
            # )
            del data['metadata']['_pid']
        if '_oai' in data['metadata']:
            del data['metadata']['_oai']
        if '_internal' in data['metadata']:
            del data['metadata']['_internal']
        return data