def test_import_records(mock_record_by_identifier, app, document_json, bucket_location): """Test import records.""" files = [{'key': 'test.pdf', 'url': 'http://some.url/file.pdf'}] # Successful importing record mock_record_by_identifier.return_value = None document_json['files'] = files ids = import_records([document_json]) record = DocumentRecord.get_record(ids[0]) assert record assert record['harvested'] # Update mock_record_by_identifier.return_value = record ids = import_records([document_json]) assert DocumentRecord.get_record(ids[0]) # Error during importation of record def exception_side_effect(data): raise Exception("No record found for identifier") mock_record_by_identifier.side_effect = exception_side_effect ids = import_records([document_json]) assert not ids
def populate_fulltext_field(sender=None, record=None, json=None, index=None, **kwargs): """Receive a signal before record is indexed, to add fulltext. This function is called just before a record is sent to index. :param sender: Sender of the signal. :param Record record: Record to index. :param dict json: JSON that will be indexed. :param str index: Name of the index in which record will be sent. """ # Takes care only about documents indexing if not index.startswith('documents'): return # Transform record in DocumentRecord if not isinstance(record, DocumentRecord): record = DocumentRecord.get_record(record.id) # No files are present in record if not record.files: return # Store fulltext in array for indexing json['fulltext'] = [] for file in record.files: if file.get('type') == 'fulltext': with file.file.storage().open() as pdf_file: json['fulltext'].append(pdf_file.read().decode('utf-8'))
def enrich_document_data(sender=None, record=None, json=None, index=None, **kwargs): """Receive a signal before record is indexed, to add fulltext. This function is called just before a record is sent to index. :param sender: Sender of the signal. :param Record record: Record to index. :param dict json: JSON that will be indexed. :param str index: Name of the index in which record will be sent. """ # Takes care only about documents indexing if not index.startswith('documents'): return # Transform record in DocumentRecord if not isinstance(record, DocumentRecord): record = DocumentRecord.get_record(record.id) # Check if record is open access. json['isOpenAccess'] = record.is_open_access() # Compile allowed IPs in document if json.get('organisation'): if json['organisation'][0].get('allowedIps'): json['organisation'][0]['ips'] = get_ips_list( json['organisation'][0]['allowedIps'].split('\n')) else: json['organisation'][0]['ips'] = [] # No files are present in record if not record.files: return # Store fulltext in array for indexing json['fulltext'] = [] for file in record.files: if file.get('type') == 'fulltext': with file.file.storage().open() as pdf_file: json['fulltext'].append(pdf_file.read().decode('utf-8'))