def generate_csv(): headers = dict.fromkeys(self.csv_included_fields) # Translate header values. for key, value in headers.items(): headers[key] = _(key) # Write the CSV output in memory line = Line() writer = csv.DictWriter(line, delimiter=';', quoting=csv.QUOTE_ALL, fieldnames=headers) writer.writerow(headers) yield line.read() for result in results['hits']['hits']: data = result['metadata'] data['pid'] = result['id'] self.format_row(data) # Write CSV data for row. data = self.process_dict(data) writer.writerow(data) yield line.read()
def _format_csv(self, records): """Return the list of records as a CSV string.""" # build a unique list of all records keys as CSV headers assert len(records) == 1 record = records[0] if record['metadata'].get('type') == Patron.ROLE_LIBRARIAN: # statistics of type librarian headers = [ key.capitalize().replace('_', ' ') for key in self.ordered_keys ] line = Line() writer = csv.writer(line) writer.writerow(headers) yield line.read() values = sorted(record['metadata']['values'], key=lambda v: v['library']['name']) for value in values: library = value['library'] value['library name'] = library['name'] value['library id'] = library['pid'] del value['library'] for v in value: if isinstance(value[v], dict): dict_to_text = '' for k, m in value[v].items(): dict_to_text += f'{k} :{m}\r\n' value[v] = dict_to_text value = StatCSVSerializer.sort_dict_by_key(value)[1] writer.writerow(value) yield line.read() else: # statistics of type billing headers = set(('library name', 'library id')) for value in record['metadata']['values']: headers.update([v for v in value.keys() if v != 'library']) # write the CSV output in memory line = Line() writer = csv.DictWriter(line, fieldnames=sorted(headers)) writer.writeheader() yield line.read() # sort by library name values = sorted(record['metadata']['values'], key=lambda v: v['library']['name']) for value in values: library = value['library'] value['library name'] = library['name'] value['library id'] = library['pid'] del value['library'] for v in value: if isinstance(value[v], dict): dict_to_text = '' for k, m in value[v].items(): dict_to_text += f'{k} :{m}\r\n' value[v] = dict_to_text writer.writerow(value) yield line.read()
def _format_csv(self, records): """Return the list of records as a CSV string. :param records: Records metadata to format. """ # build a unique list of all keys in included fields as CSV headers headers = dict.fromkeys(self.csv_included_fields) # write the CSV output in memory line = Line() writer = csv.DictWriter(line, quoting=csv.QUOTE_ALL, fieldnames=headers) writer.writeheader() yield line.read() for record in records: writer.writerow(record) yield line.read()
def _format_csv(self, records): """Return the list of records as a CSV string. :param list recors: Records list """ assert len(records) == 1 record = records[0] headers = [ 'organisation', 'type', 'documents', 'full_text', 'no_full_text' ] # Write header line = Line() writer = csv.DictWriter(line, fieldnames=headers) writer.writeheader() yield line.read() # Dump values for value in record['metadata']['values']: value['documents'] = len(value['pids']) value['no_full_text'] = value['documents'] - value['full_text'] value.pop('pids', None) writer.writerow(value) yield line.read()
def generate_csv(): def batch(results): """Chunk search results. :param results: search results. :return list of chuncked item pids and search records """ records = [] pids = [] for result in results: pids.append(result.pid) records.append(result) if len(records) % chunk_size == 0: yield pids, records pids = [] records = [] yield pids, records def get_documents_by_item_pids(item_pids): """Get documents for the given item pid list.""" def _build_doc(data): document_data = { 'document_title': next( filter(lambda x: x.get('type') == 'bf:Title', data.get('title'))).get('_text') } # process contributions creator = [] if 'contribution' in data: for contribution in data.get('contribution'): if any(role in contribution.get('role') for role in role_filter): authorized_access_point = \ f'authorized_access_point_{language}' if authorized_access_point in contribution\ .get('agent'): creator.append(contribution['agent'] [authorized_access_point]) document_data['document_creator'] = ' ; '.join(creator) document_main_type = [] document_sub_type = [] for document_type in data.get('type'): # data = document_type.to_dict() document_main_type.append( document_type.get('main_type')) document_sub_type.append( document_type.get('subtype', '')) document_data['document_main_type'] = ', '.join( document_main_type) document_data['document_sub_type'] = ', '.join( document_sub_type) # TODO : build provision activity return document_data doc_search = DocumentsSearch() \ .filter('terms', holdings__items__pid=list(item_pids)) \ .source( ['pid', 'title', 'contribution', 'provisionActivity', 'type']) docs = {} for doc in doc_search.scan(): docs[doc.pid] = _build_doc(doc.to_dict()) return docs def get_loans_by_item_pids(item_pids): """Get loans for the given item pid list.""" states = \ current_app.config['CIRCULATION_STATES_LOAN_ACTIVE'] loan_search = LoansSearch() \ .filter('terms', state=states) \ .filter('terms', item_pid__value=item_pids) \ .source(['pid', 'item_pid.value', 'start_date', 'end_date', 'state', '_created']) agg = A('terms', field='item_pid.value', size=chunk_size) loan_search.aggs.bucket('loans_count', agg) loan_search = loan_search.extra( collapse={ 'field': 'item_pid.value', "inner_hits": { "name": "most_recent", "size": 1, "sort": [{ "_created": "desc" }], } }) # default results size for the execute method is 10. # We need to set this to the chunk size" results = loan_search[0:chunk_size].execute() agg_buckets = {} for result in results.aggregations.loans_count.buckets: agg_buckets[result.key] = result.doc_count loans = {} for loan_hit in results: # get most recent loans loan_data = loan_hit.meta.inner_hits.most_recent[0]\ .to_dict() item_pid = loan_data['item_pid']['value'] loans[item_pid] = { 'loans_count': agg_buckets.get(item_pid, 0), 'last_transaction_date': ciso8601.parse_datetime(loan_data['_created']).date() } if loan_data.get('state') == LoanState.ITEM_ON_LOAN: loans[item_pid]['checkout_date'] = ciso8601.\ parse_datetime(loan_data['start_date']).date() loans[item_pid]['due_date'] = ciso8601.\ parse_datetime(loan_data['end_date']).date() return loans headers = dict.fromkeys(self.csv_included_fields) # write the CSV output in memory line = Line() writer = csv.DictWriter(line, quoting=csv.QUOTE_ALL, fieldnames=headers) writer.writeheader() yield line.read() for pids, batch_results in batch(search_result): # get documents documents = get_documents_by_item_pids(pids) # get loans loans = get_loans_by_item_pids(pids) for hit in batch_results: csv_data = hit.to_dict() csv_data['library_name'] = libraries_map[hit['library'] ['pid']] csv_data['location_name'] = locations_map[hit['location'] ['pid']] try: # update csv data with document csv_data.update(documents.get(hit['document']['pid'])) except Exception as err: current_app.logger.error( 'ERROR in csv serializer: ' '{message} on document: {pid}'.format( message=err, pid=hit['document']['pid'])) # update csv data with loan csv_data.update(loans.get(hit['pid'], {'loans_count': 0})) # process item type and temporary item type csv_data['item_type'] = item_types_map[hit['item_type'] ['pid']] temporary_item_type = csv_data.get('temporary_item_type') if temporary_item_type: csv_data['temporary_item_type'] = item_types_map[ temporary_item_type['pid']] csv_data['temporary_item_type_end_date'] = \ temporary_item_type.get('end_date') # process note for note in csv_data.get('notes', []): if any(note_type in note.get('type') for note_type in ItemNoteTypes.INVENTORY_LIST_CATEGORY): csv_data[note.get('type')] = note.get('content') csv_data['created'] = ciso8601.parse_datetime( hit['_created']).date() # process item issue if hit['type'] == 'issue': issue = csv_data['issue'] if issue.get('inherited_first_call_number') \ and not csv_data.get('call_number'): csv_data['call_number'] = \ issue.get('inherited_first_call_number') csv_data['issue_status'] = issue.get('status') if issue.get('status_date'): csv_data['issue_status_date'] = \ ciso8601.parse_datetime( issue.get('status_date')).date() csv_data['issue_claims_count'] = \ issue.get('claims_count', 0) csv_data['issue_expected_date'] = \ issue.get('expected_date') csv_data['issue_regular'] = issue.get('regular') # prevent key error del (csv_data['type']) # write csv data data = self.process_dict(csv_data) writer.writerow(data) yield line.read()