예제 #1
0
        def generate_csv():
            headers = dict.fromkeys(self.csv_included_fields)
            # Translate header values.
            for key, value in headers.items():
                headers[key] = _(key)

            # Write the CSV output in memory
            line = Line()
            writer = csv.DictWriter(line,
                                    delimiter=';',
                                    quoting=csv.QUOTE_ALL,
                                    fieldnames=headers)
            writer.writerow(headers)
            yield line.read()

            for result in results['hits']['hits']:
                data = result['metadata']
                data['pid'] = result['id']

                self.format_row(data)

                # Write CSV data for row.
                data = self.process_dict(data)
                writer.writerow(data)
                yield line.read()
예제 #2
0
    def _format_csv(self, records):
        """Return the list of records as a CSV string."""
        # build a unique list of all records keys as CSV headers
        assert len(records) == 1
        record = records[0]

        if record['metadata'].get('type') == Patron.ROLE_LIBRARIAN:
            # statistics of type librarian
            headers = [
                key.capitalize().replace('_', ' ') for key in self.ordered_keys
            ]
            line = Line()
            writer = csv.writer(line)
            writer.writerow(headers)
            yield line.read()
            values = sorted(record['metadata']['values'],
                            key=lambda v: v['library']['name'])

            for value in values:
                library = value['library']
                value['library name'] = library['name']
                value['library id'] = library['pid']
                del value['library']
                for v in value:
                    if isinstance(value[v], dict):
                        dict_to_text = ''
                        for k, m in value[v].items():
                            dict_to_text += f'{k} :{m}\r\n'
                        value[v] = dict_to_text
                value = StatCSVSerializer.sort_dict_by_key(value)[1]
                writer.writerow(value)
                yield line.read()
        else:
            # statistics of type billing
            headers = set(('library name', 'library id'))
            for value in record['metadata']['values']:
                headers.update([v for v in value.keys() if v != 'library'])

            # write the CSV output in memory
            line = Line()
            writer = csv.DictWriter(line, fieldnames=sorted(headers))
            writer.writeheader()
            yield line.read()
            # sort by library name
            values = sorted(record['metadata']['values'],
                            key=lambda v: v['library']['name'])

            for value in values:
                library = value['library']
                value['library name'] = library['name']
                value['library id'] = library['pid']
                del value['library']
                for v in value:
                    if isinstance(value[v], dict):
                        dict_to_text = ''
                        for k, m in value[v].items():
                            dict_to_text += f'{k} :{m}\r\n'
                        value[v] = dict_to_text
                writer.writerow(value)
                yield line.read()
예제 #3
0
파일: csv.py 프로젝트: lauren-d/rero-ils
    def _format_csv(self, records):
        """Return the list of records as a CSV string.

        :param records: Records metadata to format.
        """
        # build a unique list of all keys in included fields as CSV headers
        headers = dict.fromkeys(self.csv_included_fields)
        # write the CSV output in memory
        line = Line()
        writer = csv.DictWriter(line,
                                quoting=csv.QUOTE_ALL,
                                fieldnames=headers)
        writer.writeheader()
        yield line.read()

        for record in records:
            writer.writerow(record)
            yield line.read()
예제 #4
0
    def _format_csv(self, records):
        """Return the list of records as a CSV string.

        :param list recors: Records list
        """
        assert len(records) == 1
        record = records[0]
        headers = [
            'organisation', 'type', 'documents', 'full_text', 'no_full_text'
        ]

        # Write header
        line = Line()
        writer = csv.DictWriter(line, fieldnames=headers)
        writer.writeheader()
        yield line.read()

        # Dump values
        for value in record['metadata']['values']:
            value['documents'] = len(value['pids'])
            value['no_full_text'] = value['documents'] - value['full_text']
            value.pop('pids', None)
            writer.writerow(value)
            yield line.read()
예제 #5
0
        def generate_csv():
            def batch(results):
                """Chunk search results.

                :param results: search results.
                :return list of chuncked item pids and search records
                """
                records = []
                pids = []
                for result in results:
                    pids.append(result.pid)
                    records.append(result)
                    if len(records) % chunk_size == 0:
                        yield pids, records
                        pids = []
                        records = []
                yield pids, records

            def get_documents_by_item_pids(item_pids):
                """Get documents for the given item pid list."""
                def _build_doc(data):
                    document_data = {
                        'document_title':
                        next(
                            filter(lambda x: x.get('type') == 'bf:Title',
                                   data.get('title'))).get('_text')
                    }
                    # process contributions
                    creator = []
                    if 'contribution' in data:
                        for contribution in data.get('contribution'):
                            if any(role in contribution.get('role')
                                   for role in role_filter):
                                authorized_access_point = \
                                    f'authorized_access_point_{language}'
                                if authorized_access_point in contribution\
                                        .get('agent'):
                                    creator.append(contribution['agent']
                                                   [authorized_access_point])
                    document_data['document_creator'] = ' ; '.join(creator)
                    document_main_type = []
                    document_sub_type = []
                    for document_type in data.get('type'):
                        # data = document_type.to_dict()
                        document_main_type.append(
                            document_type.get('main_type'))
                        document_sub_type.append(
                            document_type.get('subtype', ''))
                    document_data['document_main_type'] = ', '.join(
                        document_main_type)
                    document_data['document_sub_type'] = ', '.join(
                        document_sub_type)
                    # TODO : build provision activity
                    return document_data

                doc_search = DocumentsSearch() \
                    .filter('terms', holdings__items__pid=list(item_pids)) \
                    .source(
                    ['pid', 'title', 'contribution', 'provisionActivity',
                     'type'])
                docs = {}
                for doc in doc_search.scan():
                    docs[doc.pid] = _build_doc(doc.to_dict())
                return docs

            def get_loans_by_item_pids(item_pids):
                """Get loans for the given item pid list."""
                states = \
                    current_app.config['CIRCULATION_STATES_LOAN_ACTIVE']
                loan_search = LoansSearch() \
                    .filter('terms', state=states) \
                    .filter('terms', item_pid__value=item_pids) \
                    .source(['pid', 'item_pid.value', 'start_date',
                            'end_date', 'state', '_created'])
                agg = A('terms', field='item_pid.value', size=chunk_size)
                loan_search.aggs.bucket('loans_count', agg)

                loan_search = loan_search.extra(
                    collapse={
                        'field': 'item_pid.value',
                        "inner_hits": {
                            "name": "most_recent",
                            "size": 1,
                            "sort": [{
                                "_created": "desc"
                            }],
                        }
                    })
                # default results size for the execute method is 10.
                # We need to set this to the chunk size"
                results = loan_search[0:chunk_size].execute()
                agg_buckets = {}
                for result in results.aggregations.loans_count.buckets:
                    agg_buckets[result.key] = result.doc_count
                loans = {}
                for loan_hit in results:
                    # get most recent loans
                    loan_data = loan_hit.meta.inner_hits.most_recent[0]\
                        .to_dict()
                    item_pid = loan_data['item_pid']['value']
                    loans[item_pid] = {
                        'loans_count':
                        agg_buckets.get(item_pid, 0),
                        'last_transaction_date':
                        ciso8601.parse_datetime(loan_data['_created']).date()
                    }
                    if loan_data.get('state') == LoanState.ITEM_ON_LOAN:
                        loans[item_pid]['checkout_date'] = ciso8601.\
                            parse_datetime(loan_data['start_date']).date()
                        loans[item_pid]['due_date'] = ciso8601.\
                            parse_datetime(loan_data['end_date']).date()
                return loans

            headers = dict.fromkeys(self.csv_included_fields)

            # write the CSV output in memory
            line = Line()
            writer = csv.DictWriter(line,
                                    quoting=csv.QUOTE_ALL,
                                    fieldnames=headers)
            writer.writeheader()
            yield line.read()

            for pids, batch_results in batch(search_result):
                # get documents
                documents = get_documents_by_item_pids(pids)

                # get loans
                loans = get_loans_by_item_pids(pids)

                for hit in batch_results:
                    csv_data = hit.to_dict()
                    csv_data['library_name'] = libraries_map[hit['library']
                                                             ['pid']]
                    csv_data['location_name'] = locations_map[hit['location']
                                                              ['pid']]

                    try:
                        # update csv data with document
                        csv_data.update(documents.get(hit['document']['pid']))
                    except Exception as err:
                        current_app.logger.error(
                            'ERROR in csv serializer: '
                            '{message} on document: {pid}'.format(
                                message=err, pid=hit['document']['pid']))

                    # update csv data with loan
                    csv_data.update(loans.get(hit['pid'], {'loans_count': 0}))

                    # process item type and temporary item type
                    csv_data['item_type'] = item_types_map[hit['item_type']
                                                           ['pid']]
                    temporary_item_type = csv_data.get('temporary_item_type')
                    if temporary_item_type:
                        csv_data['temporary_item_type'] = item_types_map[
                            temporary_item_type['pid']]
                        csv_data['temporary_item_type_end_date'] = \
                            temporary_item_type.get('end_date')

                    # process note
                    for note in csv_data.get('notes', []):
                        if any(note_type in note.get('type') for note_type in
                               ItemNoteTypes.INVENTORY_LIST_CATEGORY):
                            csv_data[note.get('type')] = note.get('content')

                    csv_data['created'] = ciso8601.parse_datetime(
                        hit['_created']).date()

                    # process item issue
                    if hit['type'] == 'issue':
                        issue = csv_data['issue']
                        if issue.get('inherited_first_call_number') \
                                and not csv_data.get('call_number'):
                            csv_data['call_number'] = \
                                issue.get('inherited_first_call_number')
                        csv_data['issue_status'] = issue.get('status')
                        if issue.get('status_date'):
                            csv_data['issue_status_date'] = \
                                ciso8601.parse_datetime(
                                    issue.get('status_date')).date()
                        csv_data['issue_claims_count'] = \
                            issue.get('claims_count', 0)
                        csv_data['issue_expected_date'] = \
                            issue.get('expected_date')
                        csv_data['issue_regular'] = issue.get('regular')

                    # prevent key error
                    del (csv_data['type'])

                    # write csv data
                    data = self.process_dict(csv_data)
                    writer.writerow(data)
                    yield line.read()