Esempio n. 1
0
class ExcelExporter:
    def __init__(self, entries, decoupled=True, project_id=None, is_preview=False):
        self.is_preview = is_preview
        self.wb = WorkBook()
        # XXX: Limit memory usage? (Or use redis?)
        self.geoarea_data_cache = {}

        # Create worksheets(Main, Grouped, Entry Groups, Bibliography)
        if decoupled:
            self.split = self.wb.get_active_sheet()\
                .set_title('Split Entries')
            self.group = self.wb.create_sheet('Grouped Entries')
        else:
            self.split = None
            self.group = self.wb.get_active_sheet().set_title('Entries')

        self.entry_groups_sheet = self.wb.create_sheet('Entry Groups')
        self.decoupled = decoupled
        self.bibliography_sheet = self.wb.create_sheet('Bibliography')
        self.bibliography_data = {}

        self.modified_exceprt_exists = entries.filter(excerpt_modified=True).exists()

        # Initial titles
        self.titles = [
            'Date of Lead Publication',
            'Imported By',
            'Date Imported',
            'Verification Status',
            'Lead Id',
            'Lead Title',
            'Lead URL',
            'Authoring Organizations Type',
            'Author',
            'Source',
            'Lead Priority',
            'Assignee',
            'Entry Id',
            'Lead-Entry Id',
            *(
                [
                    'Modified Excerpt',
                    'Original Excerpt',
                ] if self.modified_exceprt_exists else
                ['Excerpt']
            )
        ]

        self.lead_id_titles_map = {x.id: x.title for x in Lead.objects.filter(project_id=project_id)}

        project_entry_labels = ProjectEntryLabel.objects.filter(
            project_id=project_id
        ).order_by('order')

        self.label_id_title_map = {x.id: x.title for x in project_entry_labels}

        lead_groups = LeadEntryGroup.objects.filter(lead__project_id=project_id).order_by('order')
        self.group_id_title_map = {x.id: x.title for x in lead_groups}
        # Create matrix of labels and groups

        self.group_label_matrix = {
            (group.lead_id, group.id): {x.id: None for x in project_entry_labels}
            for group in lead_groups
        }

        self.entry_group_titles = [
            'Lead',
            'Group',
            *self.label_id_title_map.values(),
        ]
        self.entry_groups_sheet.append([self.entry_group_titles])

        self.col_types = {
            0: 'date',
            2: 'date',
        }

        # Keep track of sheet data present
        '''
        tabular_sheets = {
            'leadtitle-sheettitle': {
                'field1_title': col_num_in_sheet,
                'field2_title': col_num_in_sheet,
            }
        }
        '''
        self.tabular_sheets = {}

        # Keep track of tabular fields
        self.tabular_fields = {}

        self.region_data = {}
        # mapping of original name vs truncated name
        self._sheets = {}

    def load_exportable_titles(self, data, regions):
        export_type = data.get('type')
        col_type = data.get('col_type')

        if export_type == 'nested':
            children = data.get('children')
            for child in children:
                self.load_exportable_titles(child, regions)

        elif export_type == 'geo' and regions:
            self.region_data = {}

            for region in regions:
                admin_levels = region.adminlevel_set.all()
                admin_level_data = []

                self.titles.append(f'{region.title} Polygons')
                for admin_level in admin_levels:
                    self.titles.append(admin_level.title)
                    self.titles.append('{} (code)'.format(admin_level.title))

                    # Collect geo area names for each admin level
                    admin_level_data.append({
                        'id': admin_level.id,
                        'geo_area_titles': admin_level.get_geo_area_titles(),
                    })

                self.region_data[region.id] = admin_level_data

        elif export_type == 'multiple':
            index = len(self.titles)
            self.titles.extend(data.get('titles'))
            if col_type:
                for i in range(index, len(self.titles)):
                    self.col_types[i] = col_type[i - index]

        elif data.get('title'):
            index = len(self.titles)
            self.titles.append(data.get('title'))
            if col_type:
                self.col_types[index] = col_type

    def load_exportables(self, exportables, regions=None):
        # Take all exportables that contains excel info
        exportables = exportables.filter(
            data__excel__isnull=False,
        )

        # information_date_index = 1
        for exportable in exportables:
            # For each exportable, create titles according to type
            # and data
            data = exportable.data.get('excel')
            self.load_exportable_titles(data, regions)

        if self.decoupled and self.split:
            self.split.append([self.titles])
        self.group.append([self.titles])

        if self.decoupled and self.split:
            self.split.auto_fit_cells_in_row(1)
        self.group.auto_fit_cells_in_row(1)

        self.exportables = exportables
        self.regions = regions
        return self

    def add_entries_from_excel_data(self, rows, data, export_data):
        export_type = data.get('type')

        if export_type == 'nested':
            children = data.get('children')
            for i, child in enumerate(children):
                if export_data is None or i >= len(export_data):
                    _export_data = None
                else:
                    _export_data = export_data[i]
                self.add_entries_from_excel_data(
                    rows,
                    child,
                    _export_data,
                )

        elif export_type == 'multiple':
            col_span = len(data.get('titles'))
            if export_data:
                if export_data.get('type') == 'lists':
                    export_data_values = export_data.get('values')
                    rows_of_value_lists = []
                    for export_data_value in export_data_values:
                        # Handle for Matrix2D subsectors
                        # eg: ['dimension', 'subdimension', 'sector', ['sub-sector1', 'sub-sector2']]
                        #       -> ['dimension', 'subdimension', 'sector', 'sub-sector1']
                        #       -> ['dimension', 'subdimension', 'sector', 'sub-sector2']
                        if len(export_data_value) == 4 and isinstance(export_data_value[3], list):
                            if len(export_data_value[3]) > 0:
                                for subsector in export_data_value[3]:
                                    rows_of_value_lists.append(export_data_value[:3] + [subsector])
                            else:
                                rows_of_value_lists.append(export_data_value[:3] + [''])
                        elif len(export_data_value) != len(data.get('titles')):
                            titles_len = len(data.get('titles'))
                            values_len = len(export_data_value)
                            if titles_len > values_len:
                                # Add additional empty cells
                                rows_of_value_lists.append(export_data_value + [''] * (titles_len - values_len))
                            else:
                                # Remove extra cells
                                rows_of_value_lists.append(export_data_value[:titles_len])
                        else:
                            rows_of_value_lists.append(export_data_value)
                    rows.add_rows_of_value_lists(
                        # Filter if all values are None
                        [
                            x for x in rows_of_value_lists
                            if x is not None and not all(y is None for y in x)
                        ],
                        col_span,
                    )
                else:
                    rows.add_value_list(export_data.get('values'))
            else:
                rows.add_value_list([''] * col_span)

        elif export_type == 'geo' and self.regions:
            geo_id_values = []
            region_geo_polygons = {}
            if export_data:
                geo_id_values = [str(v) for v in export_data.get('values') or []]
                for geo_polygon in export_data.get('polygons') or []:
                    region_id = geo_polygon['region_id']
                    region_geo_polygons[region_id] = region_geo_polygons.get(region_id) or []
                    region_geo_polygons[region_id].append(geo_polygon['title'])

            for region in self.regions:
                admin_levels = self.region_data[region.id]
                geo_polygons = region_geo_polygons.get(region.id, [])
                max_levels = len(admin_levels)
                rows_value = []

                rows.add_rows_of_values(geo_polygons)

                for rev_level, admin_level in enumerate(admin_levels[::-1]):
                    geo_area_titles = admin_level['geo_area_titles']
                    level = max_levels - rev_level
                    for geo_id in geo_id_values:
                        if geo_id not in geo_area_titles:
                            continue
                        if geo_id in self.geoarea_data_cache:
                            rows_value.append(self.geoarea_data_cache[geo_id])
                            continue

                        row_values = ['' for i in range(0, max_levels - level)] * 2

                        title = geo_area_titles[geo_id].get('title', '')
                        code = geo_area_titles[geo_id].get('code', '')
                        parent_id = geo_area_titles[geo_id].get('parent_id')

                        row_values.extend([code, title])
                        for _level in range(0, level - 1)[::-1]:
                            if parent_id:
                                _geo_area_titles = admin_levels[_level]['geo_area_titles']
                                _geo_area = _geo_area_titles.get(parent_id) or {}
                                _title = _geo_area.get('title', '')
                                _code = _geo_area.get('code', '')
                                parent_id = _geo_area.get('parent_id')
                                row_values.extend([_code, _title])
                            else:
                                row_values.extend(['', ''])
                        rows_value.append(row_values[::-1])
                        self.geoarea_data_cache[geo_id] = row_values[::-1]

                if len(rows_value) > 0:
                    rows.add_rows_of_value_lists(rows_value)
                else:
                    rows.add_rows_of_value_lists([['' for i in range(0, max_levels)] * 2])

        else:
            if export_data:
                if export_data.get('type') == 'list':
                    row_values = [
                        # This is in hope of filtering out non-existent data from excel row
                        x for x in export_data.get('value', [])
                        if x is not None
                    ]
                    rows.add_rows_of_values(row_values if row_values else [''])
                else:
                    rows.add_value(export_data.get('value'))
            else:
                rows.add_value('')

    def get_data_series(self, entry):
        lead = entry.lead
        field = entry.tabular_field

        if field is None:
            return ''
        self.tabular_fields[field.id] = field

        # Get Sheet title which is Lead title - Sheet title
        # Worksheet title is limited to 31 as excel's tab length is capped to 31
        worksheet_title = '{}-{}'.format(lead.title, field.sheet.title)
        if not self._sheets.get(worksheet_title) and len(worksheet_title) > 31:
            self._sheets[worksheet_title] = '{}-{}'.format(
                worksheet_title[:28],
                len(self.wb.wb.worksheets)
            )
        elif not self._sheets.get(worksheet_title):
            self._sheets[worksheet_title] = worksheet_title
        worksheet_title = self._sheets[worksheet_title]

        if worksheet_title not in self.wb.wb.sheetnames:
            tabular_sheet = self.wb.create_sheet(worksheet_title).ws
        else:
            tabular_sheet = self.wb.wb.get_sheet_by_name(worksheet_title)

        # Get fields data
        worksheet_data = self.tabular_sheets.get(worksheet_title, {})

        col_number = worksheet_data.get(field.title)
        if col_number is None:
            # col_number None means we don't have the field in the work sheet
            # So, we create one assigning next number to the field
            cols_count = len(worksheet_data.keys())
            col_number = cols_count + 1
            worksheet_data[field.title] = col_number

            # Now add data to the column
            # excel_column_name converts number to excel column names: 1 -> A..
            sheet_col_name = excel_column_name(col_number)

            self.tabular_sheets[worksheet_title] = worksheet_data

            # Insert field title to sheet in first row
            tabular_sheet['{}1'.format(sheet_col_name)].value =\
                field.title

            # Add field values to corresponding column
            for i, x in enumerate(field.actual_data):
                tabular_sheet[
                    '{}{}'.format(sheet_col_name, 2 + i)
                ].value = x.get('processed_value') or x['value']
        else:
            sheet_col_name = excel_column_name(col_number)

        link = f'#\'{worksheet_title}\'!{sheet_col_name}1'
        return get_hyperlink(link, field.title)

    def get_entry_data(self, entry):
        if entry.entry_type == Entry.TagType.EXCERPT:
            return entry.excerpt

        if entry.entry_type == Entry.TagType.IMAGE:
            return entry.get_image_url()

        if entry.entry_type == Entry.TagType.DATA_SERIES:
            try:
                return self.get_data_series(entry)
            except Exception:
                logger.error(
                    'Data Series EXCEL Export Failed for entry',
                    exc_info=1,
                    extra={'data': {'entry_id': entry.pk}},
                )

        return ''

    def add_entries(self, entries):
        iterable_entries = entries[:Export.PREVIEW_ENTRY_SIZE] if self.is_preview else entries
        for i, entry in enumerate(iterable_entries):
            # Export each entry
            # Start building rows and export data for each exportable

            # ENTRY GROUP
            # Add it to appropriate row/column in self.group_label_matrix
            for group_label in entry.entrygrouplabel_set.all():
                key = (group_label.group.lead_id, group_label.group_id)
                entries_sheet_name = 'Grouped Entries' if self.decoupled else 'Entries'
                link = f'#\'{entries_sheet_name}\'!A{i+2}'
                self.group_label_matrix[key][group_label.label_id] = get_hyperlink(link, entry.excerpt[:50])

            lead = entry.lead
            assignee = entry.lead.get_assignee()
            author = lead.get_authors_display()
            source = lead.get_source_display()
            published_on = (lead.published_on and lead.published_on.strftime(EXPORT_DATE_FORMAT)) or ''
            url = lead.url

            self.bibliography_data[lead.id] = (author, source, published_on, url, lead.title)

            rows = RowsBuilder(self.split, self.group, self.decoupled)
            rows.add_value(format_date(lead.published_on))

            rows.add_value_list([
                entry.created_by and entry.created_by.profile.get_display_name(),
                format_date(entry.created_at.date()),
                'Controlled' if entry.controlled else 'Uncontrolled',
                f'{lead.id}',
                lead.title,
                lead.url or lead.generate_client_url(),
                lead.get_authoring_organizations_type_display(),
                lead.get_authors_display(),
                lead.get_source_display(),
                lead.get_priority_display(),
                assignee and assignee.profile.get_display_name(),
                f'{entry.id}',
                f'{lead.id}-{entry.id}',
                *(
                    [
                        self.get_entry_data(entry),
                        entry.dropped_excerpt,
                    ] if self.modified_exceprt_exists else
                    [self.get_entry_data(entry)]
                )
            ])

            for exportable in self.exportables:
                # Get export data for this entry corresponding to this
                # exportable.

                # And write some value based on type and data
                # or empty strings if no data.

                data = exportable.data.get('excel')
                export_data = ExportData.objects.filter(
                    exportable=exportable,
                    entry=entry,
                    data__excel__isnull=False,
                ).first()

                # TODO: handle for conditional widget
                if export_data and type(export_data.data.get('excel', {})) == list:
                    export_data = export_data.data.get('excel', [])
                else:
                    export_data = export_data and {
                        **export_data.data.get('common', {}),
                        **export_data.data.get('excel', {})
                    }
                self.add_entries_from_excel_data(rows, data, export_data)

            rows.apply()

        # Now add data to entry group sheet
        for (leadid, gid), labeldata in self.group_label_matrix.items():
            row_data = [
                self.lead_id_titles_map.get(leadid), self.group_id_title_map.get(gid),
                *labeldata.values()
            ]
            self.entry_groups_sheet.append([row_data])
        return self

    def add_bibliography_sheet(self):
        self.bibliography_sheet.append([['Author', 'Source', 'Published Date', 'Title']])
        for author, source, published, url, title in self.bibliography_data.values():
            self.bibliography_sheet.append(
                [[author, source, published, get_hyperlink(url, title) if url else title]]
            )

    def export(self):
        """
        Export and return export data
        """
        self.group.set_col_types(self.col_types)
        if self.split:
            self.split.set_col_types(self.col_types)

        # Add bibliography
        self.add_bibliography_sheet()

        buffer = self.wb.save()
        return ContentFile(buffer)
Esempio n. 2
0
class ExcelExporter:
    def __init__(self, decoupled=True):
        self.wb = WorkBook()

        # Create two worksheets
        if decoupled:
            self.split = self.wb.get_active_sheet()\
                .set_title('Split Entries')
            self.group = self.wb.create_sheet('Grouped Entries')
        else:
            self.split = None
            self.group = self.wb.get_active_sheet().set_title('Entries')
        self.decoupled = decoupled

        # Initial titles
        self.titles = [
            'Date of Lead Publication',
            'Imported By',
            'Date Imported',
            'Lead Title',
            'Source',
            'Assignee',
            'Excerpt',
        ]

    def load_exportables(self, exportables, regions=None):
        # Take all exportables that contains excel info
        exportables = exportables.filter(
            data__excel__isnull=False,
        )

        # information_date_index = 1
        for exportable in exportables:
            # For each exportable, create titles according to type
            # and data

            data = exportable.data.get('excel')
            export_type = data.get('type')

            # if export_type == 'information-date':
            #     self.titles.insert(
            #         information_date_index,
            #         data.get('title'),
            #     )
            #     information_date_index += 1

            if export_type == 'geo' and regions:
                for region in regions:
                    admin_levels = region.adminlevel_set.all()
                    for admin_level in admin_levels:
                        self.titles.append(admin_level.title)

            elif export_type == 'multiple':
                self.titles.extend(data.get('titles'))

            elif data.get('title'):
                self.titles.append(data.get('title'))

        if self.decoupled and self.split:
            self.split.append([self.titles])
        self.group.append([self.titles])

        if self.decoupled and self.split:
            self.split.auto_fit_cells_in_row(1)
        self.group.auto_fit_cells_in_row(1)

        self.exportables = exportables
        self.regions = regions
        return self

    def add_entries(self, entries):
        for entry in entries:
            # Export each entry
            # Start building rows and export data for each exportable

            rows = RowsBuilder(self.split, self.group, self.decoupled)
            rows.add_value(format_date(entry.lead.published_on))

            # TODO Check for information dates

            assignee = entry.lead.get_assignee()
            rows.add_value_list([
                entry.created_by.profile.get_display_name(),
                format_date(entry.created_at.date()),
                entry.lead.title,
                entry.lead.source,
                assignee and assignee.profile.get_display_name(),
                entry.excerpt
                if entry.entry_type == Entry.EXCERPT
                else 'IMAGE',
            ])

            for exportable in self.exportables:
                # Get export data for this entry corresponding to this
                # exportable

                # And write some value based on type and data
                # or empty strings if no data

                data = exportable.data.get('excel')
                export_data = ExportData.objects.filter(
                    exportable=exportable,
                    entry=entry,
                    data__excel__isnull=False,
                ).first()

                export_data = export_data and export_data.data.get('excel')
                export_type = data.get('type')

                if export_type == 'multiple':
                    col_span = len(data.get('titles'))
                    if export_data:
                        if export_data.get('type') == 'lists':
                            rows.add_rows_of_value_lists(
                                export_data.get('values'),
                                col_span,
                            )
                        else:
                            rows.add_value_list(
                                export_data.get('values'),
                            )
                    else:
                        rows.add_value_list([''] * col_span)

                elif export_type == 'geo' and self.regions:
                    values = []
                    if export_data:
                        values = export_data.get('values', [])
                        values = [int(v) for v in values]

                    for region in self.regions:
                        admin_levels = region.adminlevel_set.all()

                        for admin_level in admin_levels:
                            geo_data = GeoArea.objects.filter(
                                admin_level=admin_level,
                                id__in=values,
                            ).distinct()
                            if geo_data.count() > 0:
                                rows.add_rows_of_values([
                                    g.title for g in geo_data
                                ])
                            else:
                                rows.add_value('')

                else:
                    if export_data:
                        if export_data.get('type') == 'list':
                            rows.add_rows_of_values(export_data.get('value'))
                        else:
                            rows.add_value(export_data.get('value'))
                    else:
                        rows.add_value('')

            rows.apply()
        return self

    def export(self, export_entity):
        buffer = self.wb.save()
        filename = generate_filename('Entries Export', 'xlsx')

        export_entity.title = filename
        export_entity.type = 'entries'
        export_entity.format = 'xlsx'
        export_entity.pending = False
        export_entity.mime_type = EXCEL_MIME_TYPE

        export_entity.file.save(filename, ContentFile(buffer))
        export_entity.save()
Esempio n. 3
0
class ExcelExporter:
    """
    NOTE: Legacy exporter (Not used)
    """
    def __init__(self, decoupled=True):
        self.wb = WorkBook()

        # Create two worksheets
        if decoupled:
            self.split = self.wb.get_active_sheet()\
                .set_title('Split Assessments')
            self.group = self.wb.create_sheet('Grouped Assessments')
        else:
            self.split = None
            self.group = self.wb.get_active_sheet().set_title('Assessments')
        self.decoupled = decoupled

        # Cells to be merged
        self.merge_cells = {}
        # Initial titles
        self.lead_titles = [
            'Date of Lead Publication',
            'Imported By',
            'Lead Title',
            'Source',
        ]
        self.titles = [*self.lead_titles]
        self.col_types = {
            0: 'date',
        }
        self._titles_dict = {k: True for k in self.titles}

        self._headers_titles = OrderedDict()
        self._headers_added = False
        self._excel_rows = []
        self._title_headers = []
        self._headers_dict = {}
        self._flats = []
        self._assessments = []

    def to_flattened_key_vals(self, dictdata, parents=[]):
        """
        Convert nested dictionary data to flat dict with keys and nondict
        values.
        @dictdata: nested dict structrure to be flattened
        @parents: parent titles for given level of title.
                Order [immediate parent, next parent, ... root]
        """
        flat = OrderedDict()  # to preserve order of titles
        for k, v in dictdata.items():
            if isinstance(v, dict):
                # NOTE: this might override the keys in recursive calls
                flat.update(self.to_flattened_key_vals(v, [k, *parents]))
            elif isinstance(v, list):
                # check if list elements are dict or not
                for i in v:
                    if isinstance(i, dict):
                        flat.update(
                            self.to_flattened_key_vals(i, [k, *parents]))
                    else:
                        vals = flat.get(k, {}).get('value', [])
                        vals.append(i)
                        # FIXME: assigning parents is repeated every step
                        flat[k] = {'value': vals, 'parents': parents}
            else:
                # Just add key value
                flat[k] = {'value': v, 'parents': parents}
        return flat

    def add_assessments(self, assessments):
        for a in assessments:
            self.add_assessment(a)
        return self

    def add_assessment(self, assessment):
        jsondata = assessment.to_exportable_json()
        flat = self.to_flattened_key_vals(jsondata)
        self._flats.append(flat)
        self._assessments.append(assessment)

        # update the titles
        for k, v in flat.items():
            parent = v['parents'][-1]
            header_titles = self._headers_titles.get(parent, [])

            if k not in header_titles:
                header_titles.append(k)
            self._headers_titles[parent] = header_titles
            '''
            if not self._titles_dict.get(k):
                self.titles.append(k)
                self._titles_dict[k] = True
            '''
        return self

    def get_titles(self):
        return [
            *self.lead_titles,
            *[y for k, v in self._headers_titles.items() for y in v]
        ]

    def assessments_to_rows(self):
        for index, assessment in enumerate(self._assessments):
            rows = RowsBuilder(self.split, self.group, split=False)
            lead = assessment.lead
            rows.add_value_list([
                format_date(lead.created_at),
                lead.created_by.username,
                lead.title,
                (lead.source and lead.source.title) or lead.source_raw,
            ])
            headers_dict = {}
            flat = self._flats[index]
            for i, t in enumerate(self.get_titles()):
                v = flat.get(t)
                if not v and t not in self.lead_titles:
                    rows.add_value("")
                    self._title_headers.append("")
                    continue
                elif not v:
                    self._title_headers.append("")
                    continue

                v = flat[t]['value']
                val = ', '.join([str(x) for x in v]) if isinstance(
                    v, list) else str(v)
                rows.add_value(val)
                header = flat[t]['parents'][-1]

                if not self._headers_dict.get(header):
                    self._title_headers.append(header.upper())
                    self._headers_dict[header] = True
                else:
                    self.merge_cells[header]['end'] += 1

                if not headers_dict.get(header):
                    self.merge_cells[header] = {'start': i, 'end': i}
                    headers_dict[header] = True
                else:
                    self._title_headers.append("")

            self._excel_rows.append(rows)

    def export(self):
        # Generate rows
        self.assessments_to_rows()

        # add header rows
        headerrows = RowsBuilder(self.split, self.group, split=False)
        headerrows.add_value_list(self._title_headers)
        headerrows.apply()

        self.group.append([self.get_titles()])

        if self.decoupled and self.split:
            self.split.auto_fit_cells_in_row(1)
        self.group.auto_fit_cells_in_row(1)

        # add rows
        for rows in self._excel_rows:
            rows.apply()

        # merge cells
        if self.merge_cells:
            sheet = self.wb.wb.active
            for k, v in self.merge_cells.items():
                sheet.merge_cells(start_row=1,
                                  start_column=v['start'] + 1,
                                  end_row=1,
                                  end_column=v['end'] + 1)
                cell = sheet.cell(row=1, column=v['start'] + 1)
                cell.alignment = Alignment(horizontal='center')

        self.group.set_col_types(self.col_types)
        if self.split:
            self.split.set_col_types(self.col_types)

        buffer = self.wb.save()
        return ContentFile(buffer)
Esempio n. 4
0
class NewExcelExporter:
    def __init__(self, sheets_data):
        """
        sheets_data = {
            sheet1: {
                grouped_col: [
                    { col1: val1, col2: val2, col3: val3 },
                    { col1: val1, col2: val2, col3: val3 },
                    ...
                ],
                ungrouped_col: [
                    val1,
                    val2,
                    ...
                ],
            },
            sheet2: {
                ...
            }
        }
        """
        self.wb = WorkBook()
        self.sheets_data = sheets_data or {}
        self.wb_sheets = {}
        self.sheet_headers = {}

        # TODO: validate all data in each col, to see if each of them have same
        # structure(keys)
        for sheet, sheet_data in sheets_data.items():
            self.wb_sheets[sheet] = self.wb.create_sheet(
                underscore_to_title(sheet))
            self.sheet_headers[sheet] = {}
            for col, data in sheet_data.items():
                if data and isinstance(data[0], dict):
                    self.sheet_headers[sheet][col] = data[0].keys()
                else:
                    self.sheet_headers[sheet][col] = []

    def add_headers(self):
        for sheet, headerinfo in self.sheet_headers.items():
            header_row = []
            sub_header_row = []
            for header, info in headerinfo.items():
                header_row.append(underscore_to_title(str(header)))
                # Also add empty cells to account for sub headers
                if info:
                    header_row.extend([""] * (len(info) - 1))
                    sub_header_row.extend(
                        [underscore_to_title(x) for x in info])
                else:
                    sub_header_row.append("")

            # Append header rows to sheet
            self.wb_sheets[sheet].append([header_row, sub_header_row])
            # Merge/style Headers
            counter = 1
            for header, info in headerinfo.items():
                wb_sheet = self.wb_sheets[sheet].ws
                if info:
                    wb_sheet.merge_cells(start_row=1,
                                         start_column=counter,
                                         end_row=1,
                                         end_column=counter + len(info) - 1)
                    counter += len(info)
                else:
                    counter += 1
                # Styling
                cell = wb_sheet.cell(row=1, column=counter)
                cell.alignment = Alignment(horizontal='center')
                cell.font = Font(bold=True)
            # Style sub headers
            for i, header in enumerate(sub_header_row):
                cell = wb_sheet.cell(row=2, column=i + 1)
                cell.font = Font(bold=True)
            self.wb_sheets[sheet].auto_fit_cells_in_row(1)
            self.wb_sheets[sheet].auto_fit_cells_in_row(2)

    def add_data_rows_to_sheets(self):
        for sheet, sheet_data in self.sheets_data.items():
            zipped = zip(*sheet_data.values())
            for row_data in zipped:
                row = []
                for each in row_data:
                    rowdata = each.values() if isinstance(each,
                                                          dict) else [each]
                    row.extend(rowdata)
                self.wb_sheets[sheet].append([row])

    def export(self):
        # Write cols header first
        self.add_headers()

        # Add data rows
        self.add_data_rows_to_sheets()

        # Remove default sheet only if other sheets present
        if self.wb_sheets:
            self.wb.wb.remove(self.wb.wb.get_sheet_by_name('Sheet'))

        buffer = self.wb.save()
        return ContentFile(buffer)
Esempio n. 5
0
class ExcelExporter:
    def __init__(self, decoupled=True):
        self.wb = WorkBook()

        # Create two worksheets
        if decoupled:
            self.split = self.wb.get_active_sheet()\
                .set_title('Split Assessments')
            self.group = self.wb.create_sheet('Grouped Assessments')
        else:
            self.split = None
            self.group = self.wb.get_active_sheet().set_title('Assessments')
        self.decoupled = decoupled

        # Cells to be merged
        self.merge_cells = {}
        # Initial titles
        self.titles = [
            'Date of Lead Publication',
            'Imported By',
            'Lead Title',
            'Source',
        ]
        self._titles_dict = {k: True for k in self.titles}

    def to_flattened_key_vals(self, dictdata, parents=[]):
        """
        Convert nested dictionary data to flat dict with keys and nondict
        values.
        @dictdata: nested dict structrure to be flattened
        @parents: parent titles for given level of title.
                Order [immediate parent, next parent, ... root]
        """
        flat = OrderedDict()  # to preserve order of titles
        for k, v in dictdata.items():
            if isinstance(v, dict):
                # NOTE: this might override the keys in recursive calls
                flat.update(self.to_flattened_key_vals(v, [k, *parents]))
            elif isinstance(v, list):
                # check if list elements are dict or not
                for i in v:
                    if isinstance(i, dict):
                        flat.update(
                            self.to_flattened_key_vals(i, [k, *parents])
                        )
                    else:
                        vals = flat.get(k, {}).get('value', [])
                        vals.append(i)
                        # FIXME: assigning parents is repeated every step
                        flat[k] = {'value': vals, 'parents': parents}
            else:
                # Just add key value
                flat[k] = {'value': v, 'parents': parents}
        return flat

    def add_assessments(self, assessments):
        for a in assessments:
            self.add_assessment(a)
        return self

    def add_assessment(self, assessment):
        jsondata = assessment.to_exportable_json()
        flat = self.to_flattened_key_vals(jsondata)
        rows = RowsBuilder(self.split, self.group, split=False)
        rows.add_value_list([
            format_date(assessment.lead.created_at),
            assessment.lead.created_by.username,
            assessment.lead.title,
            assessment.lead.source
        ])
        # update the titles
        for k, v in flat.items():
            if not self._titles_dict.get(k):
                self.titles.append(k)
                self._titles_dict[k] = True

        # create headers
        self.merge_cells = {}  # to store which cells to merge
        headers_dict = {}
        title_headers = []
        for i, t in enumerate(self.titles):
            v = flat.get(t)
            if v:
                v = flat[t]['value']
                val = ', '.join(v) if isinstance(v, list) else str(v)
                rows.add_value(val)
                header = flat[t]['parents'][-1]
                if not headers_dict.get(header):
                    self.merge_cells[header] = {'start': i, 'end': i}
                    title_headers.append(header.upper())
                    headers_dict[header] = True
                else:
                    self.merge_cells[header]['end'] += 1
                    title_headers.append("")
            else:
                title_headers.append("")

        headerrows = RowsBuilder(self.split, self.group, split=False)
        headerrows.add_value_list(title_headers)
        headerrows.apply()
        self.group.append([self.titles])

        if self.decoupled and self.split:
            self.split.auto_fit_cells_in_row(1)
        self.group.auto_fit_cells_in_row(1)

        rows.apply()
        return self

    def export(self, export_entity):
        # merge cells
        if self.merge_cells:
            sheet = self.wb.wb.active
            for k, v in self.merge_cells.items():
                sheet.merge_cells(
                    start_row=1, start_column=v['start'] + 1,
                    end_row=1, end_column=v['end'] + 1
                )
                cell = sheet.cell(row=1, column=v['start'] + 1)
                cell.alignment = Alignment(horizontal='center')

        buffer = self.wb.save()
        filename = generate_filename('Assessments Export', 'xlsx')

        export_entity.title = filename
        export_entity.type = Export.ASSESSMENTS
        export_entity.format = 'xlsx'
        export_entity.pending = False
        export_entity.mime_type = EXCEL_MIME_TYPE

        export_entity.file.save(filename, ContentFile(buffer))
        export_entity.save()