Ejemplo n.º 1
0
def test_import_records(mock_record_by_identifier, app, document_json,
                        bucket_location):
    """Test import records."""
    files = [{'key': 'test.pdf', 'url': 'http://some.url/file.pdf'}]

    # Successful importing record
    mock_record_by_identifier.return_value = None
    document_json['files'] = files
    ids = import_records([document_json])
    record = DocumentRecord.get_record(ids[0])
    assert record
    assert record['harvested']

    # Update
    mock_record_by_identifier.return_value = record
    ids = import_records([document_json])
    assert DocumentRecord.get_record(ids[0])

    # Error during importation of record
    def exception_side_effect(data):
        raise Exception("No record found for identifier")

    mock_record_by_identifier.side_effect = exception_side_effect

    ids = import_records([document_json])

    assert not ids
Ejemplo n.º 2
0
def populate_fulltext_field(sender=None,
                            record=None,
                            json=None,
                            index=None,
                            **kwargs):
    """Receive a signal before record is indexed, to add fulltext.

    This function is called just before a record is sent to index.

    :param sender: Sender of the signal.
    :param Record record: Record to index.
    :param dict json: JSON that will be indexed.
    :param str index: Name of the index in which record will be sent.
    """
    # Takes care only about documents indexing
    if not index.startswith('documents'):
        return

    # Transform record in DocumentRecord
    if not isinstance(record, DocumentRecord):
        record = DocumentRecord.get_record(record.id)

    # No files are present in record
    if not record.files:
        return

    # Store fulltext in array for indexing
    json['fulltext'] = []
    for file in record.files:
        if file.get('type') == 'fulltext':
            with file.file.storage().open() as pdf_file:
                json['fulltext'].append(pdf_file.read().decode('utf-8'))
Ejemplo n.º 3
0
def enrich_document_data(sender=None,
                         record=None,
                         json=None,
                         index=None,
                         **kwargs):
    """Receive a signal before record is indexed, to add fulltext.

    This function is called just before a record is sent to index.

    :param sender: Sender of the signal.
    :param Record record: Record to index.
    :param dict json: JSON that will be indexed.
    :param str index: Name of the index in which record will be sent.
    """
    # Takes care only about documents indexing
    if not index.startswith('documents'):
        return

    # Transform record in DocumentRecord
    if not isinstance(record, DocumentRecord):
        record = DocumentRecord.get_record(record.id)

    # Check if record is open access.
    json['isOpenAccess'] = record.is_open_access()

    # Compile allowed IPs in document
    if json.get('organisation'):
        if json['organisation'][0].get('allowedIps'):
            json['organisation'][0]['ips'] = get_ips_list(
                json['organisation'][0]['allowedIps'].split('\n'))
        else:
            json['organisation'][0]['ips'] = []

    # No files are present in record
    if not record.files:
        return

    # Store fulltext in array for indexing
    json['fulltext'] = []
    for file in record.files:
        if file.get('type') == 'fulltext':
            with file.file.storage().open() as pdf_file:
                json['fulltext'].append(pdf_file.read().decode('utf-8'))