class ExcelExporter: def __init__(self, analytical_statement_entries): self.wb = WorkBook() self.split = None self.analysis_sheet = self.wb.get_active_sheet().set_title('Analysis') self.titles = [ 'Analysis Pillar ID', 'Analysis Pillar', 'Assignee', 'Statement ID', 'Statement', 'Entry ID', 'Entry', 'Entry Link', 'Source Link' ] def add_analytical_statement_entries(self, analytical_statement_entries): self.analysis_sheet.append([self.titles]) # FIXME: Use values only instead of fetching everything. qs = analytical_statement_entries.select_related( 'entry', 'entry__lead', 'analytical_statement', 'analytical_statement__analysis_pillar', 'analytical_statement__analysis_pillar__assignee', ) for analytical_statement_entry in qs.iterator(): entry = analytical_statement_entry.entry lead = entry.lead entry_permalink_url = Permalink().entry( entry.project_id, lead.id, entry.id, ) analytical_statement = analytical_statement_entry.analytical_statement analysis_pillar = analytical_statement.analysis_pillar self.analysis_sheet.append([[ analysis_pillar.id, analysis_pillar.title, analysis_pillar.assignee.get_display_name(), analytical_statement.pk, analytical_statement.statement, entry.id, entry.excerpt, entry_permalink_url, lead.url, ]]) return self def export(self): buffer = self.wb.save() return ContentFile(buffer)
class ExcelExporter: def __init__(self, entries, decoupled=True, project_id=None, is_preview=False): self.is_preview = is_preview self.wb = WorkBook() # XXX: Limit memory usage? (Or use redis?) self.geoarea_data_cache = {} # Create worksheets(Main, Grouped, Entry Groups, Bibliography) if decoupled: self.split = self.wb.get_active_sheet()\ .set_title('Split Entries') self.group = self.wb.create_sheet('Grouped Entries') else: self.split = None self.group = self.wb.get_active_sheet().set_title('Entries') self.entry_groups_sheet = self.wb.create_sheet('Entry Groups') self.decoupled = decoupled self.bibliography_sheet = self.wb.create_sheet('Bibliography') self.bibliography_data = {} self.modified_exceprt_exists = entries.filter(excerpt_modified=True).exists() # Initial titles self.titles = [ 'Date of Lead Publication', 'Imported By', 'Date Imported', 'Verification Status', 'Lead Id', 'Lead Title', 'Lead URL', 'Authoring Organizations Type', 'Author', 'Source', 'Lead Priority', 'Assignee', 'Entry Id', 'Lead-Entry Id', *( [ 'Modified Excerpt', 'Original Excerpt', ] if self.modified_exceprt_exists else ['Excerpt'] ) ] self.lead_id_titles_map = {x.id: x.title for x in Lead.objects.filter(project_id=project_id)} project_entry_labels = ProjectEntryLabel.objects.filter( project_id=project_id ).order_by('order') self.label_id_title_map = {x.id: x.title for x in project_entry_labels} lead_groups = LeadEntryGroup.objects.filter(lead__project_id=project_id).order_by('order') self.group_id_title_map = {x.id: x.title for x in lead_groups} # Create matrix of labels and groups self.group_label_matrix = { (group.lead_id, group.id): {x.id: None for x in project_entry_labels} for group in lead_groups } self.entry_group_titles = [ 'Lead', 'Group', *self.label_id_title_map.values(), ] self.entry_groups_sheet.append([self.entry_group_titles]) self.col_types = { 0: 'date', 2: 'date', } # Keep track of sheet data present ''' tabular_sheets = { 'leadtitle-sheettitle': { 'field1_title': col_num_in_sheet, 'field2_title': col_num_in_sheet, } } ''' self.tabular_sheets = {} # Keep track of tabular fields self.tabular_fields = {} self.region_data = {} # mapping of original name vs truncated name self._sheets = {} def load_exportable_titles(self, data, regions): export_type = data.get('type') col_type = data.get('col_type') if export_type == 'nested': children = data.get('children') for child in children: self.load_exportable_titles(child, regions) elif export_type == 'geo' and regions: self.region_data = {} for region in regions: admin_levels = region.adminlevel_set.all() admin_level_data = [] self.titles.append(f'{region.title} Polygons') for admin_level in admin_levels: self.titles.append(admin_level.title) self.titles.append('{} (code)'.format(admin_level.title)) # Collect geo area names for each admin level admin_level_data.append({ 'id': admin_level.id, 'geo_area_titles': admin_level.get_geo_area_titles(), }) self.region_data[region.id] = admin_level_data elif export_type == 'multiple': index = len(self.titles) self.titles.extend(data.get('titles')) if col_type: for i in range(index, len(self.titles)): self.col_types[i] = col_type[i - index] elif data.get('title'): index = len(self.titles) self.titles.append(data.get('title')) if col_type: self.col_types[index] = col_type def load_exportables(self, exportables, regions=None): # Take all exportables that contains excel info exportables = exportables.filter( data__excel__isnull=False, ) # information_date_index = 1 for exportable in exportables: # For each exportable, create titles according to type # and data data = exportable.data.get('excel') self.load_exportable_titles(data, regions) if self.decoupled and self.split: self.split.append([self.titles]) self.group.append([self.titles]) if self.decoupled and self.split: self.split.auto_fit_cells_in_row(1) self.group.auto_fit_cells_in_row(1) self.exportables = exportables self.regions = regions return self def add_entries_from_excel_data(self, rows, data, export_data): export_type = data.get('type') if export_type == 'nested': children = data.get('children') for i, child in enumerate(children): if export_data is None or i >= len(export_data): _export_data = None else: _export_data = export_data[i] self.add_entries_from_excel_data( rows, child, _export_data, ) elif export_type == 'multiple': col_span = len(data.get('titles')) if export_data: if export_data.get('type') == 'lists': export_data_values = export_data.get('values') rows_of_value_lists = [] for export_data_value in export_data_values: # Handle for Matrix2D subsectors # eg: ['dimension', 'subdimension', 'sector', ['sub-sector1', 'sub-sector2']] # -> ['dimension', 'subdimension', 'sector', 'sub-sector1'] # -> ['dimension', 'subdimension', 'sector', 'sub-sector2'] if len(export_data_value) == 4 and isinstance(export_data_value[3], list): if len(export_data_value[3]) > 0: for subsector in export_data_value[3]: rows_of_value_lists.append(export_data_value[:3] + [subsector]) else: rows_of_value_lists.append(export_data_value[:3] + ['']) elif len(export_data_value) != len(data.get('titles')): titles_len = len(data.get('titles')) values_len = len(export_data_value) if titles_len > values_len: # Add additional empty cells rows_of_value_lists.append(export_data_value + [''] * (titles_len - values_len)) else: # Remove extra cells rows_of_value_lists.append(export_data_value[:titles_len]) else: rows_of_value_lists.append(export_data_value) rows.add_rows_of_value_lists( # Filter if all values are None [ x for x in rows_of_value_lists if x is not None and not all(y is None for y in x) ], col_span, ) else: rows.add_value_list(export_data.get('values')) else: rows.add_value_list([''] * col_span) elif export_type == 'geo' and self.regions: geo_id_values = [] region_geo_polygons = {} if export_data: geo_id_values = [str(v) for v in export_data.get('values') or []] for geo_polygon in export_data.get('polygons') or []: region_id = geo_polygon['region_id'] region_geo_polygons[region_id] = region_geo_polygons.get(region_id) or [] region_geo_polygons[region_id].append(geo_polygon['title']) for region in self.regions: admin_levels = self.region_data[region.id] geo_polygons = region_geo_polygons.get(region.id, []) max_levels = len(admin_levels) rows_value = [] rows.add_rows_of_values(geo_polygons) for rev_level, admin_level in enumerate(admin_levels[::-1]): geo_area_titles = admin_level['geo_area_titles'] level = max_levels - rev_level for geo_id in geo_id_values: if geo_id not in geo_area_titles: continue if geo_id in self.geoarea_data_cache: rows_value.append(self.geoarea_data_cache[geo_id]) continue row_values = ['' for i in range(0, max_levels - level)] * 2 title = geo_area_titles[geo_id].get('title', '') code = geo_area_titles[geo_id].get('code', '') parent_id = geo_area_titles[geo_id].get('parent_id') row_values.extend([code, title]) for _level in range(0, level - 1)[::-1]: if parent_id: _geo_area_titles = admin_levels[_level]['geo_area_titles'] _geo_area = _geo_area_titles.get(parent_id) or {} _title = _geo_area.get('title', '') _code = _geo_area.get('code', '') parent_id = _geo_area.get('parent_id') row_values.extend([_code, _title]) else: row_values.extend(['', '']) rows_value.append(row_values[::-1]) self.geoarea_data_cache[geo_id] = row_values[::-1] if len(rows_value) > 0: rows.add_rows_of_value_lists(rows_value) else: rows.add_rows_of_value_lists([['' for i in range(0, max_levels)] * 2]) else: if export_data: if export_data.get('type') == 'list': row_values = [ # This is in hope of filtering out non-existent data from excel row x for x in export_data.get('value', []) if x is not None ] rows.add_rows_of_values(row_values if row_values else ['']) else: rows.add_value(export_data.get('value')) else: rows.add_value('') def get_data_series(self, entry): lead = entry.lead field = entry.tabular_field if field is None: return '' self.tabular_fields[field.id] = field # Get Sheet title which is Lead title - Sheet title # Worksheet title is limited to 31 as excel's tab length is capped to 31 worksheet_title = '{}-{}'.format(lead.title, field.sheet.title) if not self._sheets.get(worksheet_title) and len(worksheet_title) > 31: self._sheets[worksheet_title] = '{}-{}'.format( worksheet_title[:28], len(self.wb.wb.worksheets) ) elif not self._sheets.get(worksheet_title): self._sheets[worksheet_title] = worksheet_title worksheet_title = self._sheets[worksheet_title] if worksheet_title not in self.wb.wb.sheetnames: tabular_sheet = self.wb.create_sheet(worksheet_title).ws else: tabular_sheet = self.wb.wb.get_sheet_by_name(worksheet_title) # Get fields data worksheet_data = self.tabular_sheets.get(worksheet_title, {}) col_number = worksheet_data.get(field.title) if col_number is None: # col_number None means we don't have the field in the work sheet # So, we create one assigning next number to the field cols_count = len(worksheet_data.keys()) col_number = cols_count + 1 worksheet_data[field.title] = col_number # Now add data to the column # excel_column_name converts number to excel column names: 1 -> A.. sheet_col_name = excel_column_name(col_number) self.tabular_sheets[worksheet_title] = worksheet_data # Insert field title to sheet in first row tabular_sheet['{}1'.format(sheet_col_name)].value =\ field.title # Add field values to corresponding column for i, x in enumerate(field.actual_data): tabular_sheet[ '{}{}'.format(sheet_col_name, 2 + i) ].value = x.get('processed_value') or x['value'] else: sheet_col_name = excel_column_name(col_number) link = f'#\'{worksheet_title}\'!{sheet_col_name}1' return get_hyperlink(link, field.title) def get_entry_data(self, entry): if entry.entry_type == Entry.TagType.EXCERPT: return entry.excerpt if entry.entry_type == Entry.TagType.IMAGE: return entry.get_image_url() if entry.entry_type == Entry.TagType.DATA_SERIES: try: return self.get_data_series(entry) except Exception: logger.error( 'Data Series EXCEL Export Failed for entry', exc_info=1, extra={'data': {'entry_id': entry.pk}}, ) return '' def add_entries(self, entries): iterable_entries = entries[:Export.PREVIEW_ENTRY_SIZE] if self.is_preview else entries for i, entry in enumerate(iterable_entries): # Export each entry # Start building rows and export data for each exportable # ENTRY GROUP # Add it to appropriate row/column in self.group_label_matrix for group_label in entry.entrygrouplabel_set.all(): key = (group_label.group.lead_id, group_label.group_id) entries_sheet_name = 'Grouped Entries' if self.decoupled else 'Entries' link = f'#\'{entries_sheet_name}\'!A{i+2}' self.group_label_matrix[key][group_label.label_id] = get_hyperlink(link, entry.excerpt[:50]) lead = entry.lead assignee = entry.lead.get_assignee() author = lead.get_authors_display() source = lead.get_source_display() published_on = (lead.published_on and lead.published_on.strftime(EXPORT_DATE_FORMAT)) or '' url = lead.url self.bibliography_data[lead.id] = (author, source, published_on, url, lead.title) rows = RowsBuilder(self.split, self.group, self.decoupled) rows.add_value(format_date(lead.published_on)) rows.add_value_list([ entry.created_by and entry.created_by.profile.get_display_name(), format_date(entry.created_at.date()), 'Controlled' if entry.controlled else 'Uncontrolled', f'{lead.id}', lead.title, lead.url or lead.generate_client_url(), lead.get_authoring_organizations_type_display(), lead.get_authors_display(), lead.get_source_display(), lead.get_priority_display(), assignee and assignee.profile.get_display_name(), f'{entry.id}', f'{lead.id}-{entry.id}', *( [ self.get_entry_data(entry), entry.dropped_excerpt, ] if self.modified_exceprt_exists else [self.get_entry_data(entry)] ) ]) for exportable in self.exportables: # Get export data for this entry corresponding to this # exportable. # And write some value based on type and data # or empty strings if no data. data = exportable.data.get('excel') export_data = ExportData.objects.filter( exportable=exportable, entry=entry, data__excel__isnull=False, ).first() # TODO: handle for conditional widget if export_data and type(export_data.data.get('excel', {})) == list: export_data = export_data.data.get('excel', []) else: export_data = export_data and { **export_data.data.get('common', {}), **export_data.data.get('excel', {}) } self.add_entries_from_excel_data(rows, data, export_data) rows.apply() # Now add data to entry group sheet for (leadid, gid), labeldata in self.group_label_matrix.items(): row_data = [ self.lead_id_titles_map.get(leadid), self.group_id_title_map.get(gid), *labeldata.values() ] self.entry_groups_sheet.append([row_data]) return self def add_bibliography_sheet(self): self.bibliography_sheet.append([['Author', 'Source', 'Published Date', 'Title']]) for author, source, published, url, title in self.bibliography_data.values(): self.bibliography_sheet.append( [[author, source, published, get_hyperlink(url, title) if url else title]] ) def export(self): """ Export and return export data """ self.group.set_col_types(self.col_types) if self.split: self.split.set_col_types(self.col_types) # Add bibliography self.add_bibliography_sheet() buffer = self.wb.save() return ContentFile(buffer)
class ExcelExporter: def __init__(self, decoupled=True): self.wb = WorkBook() # Create two worksheets if decoupled: self.split = self.wb.get_active_sheet()\ .set_title('Split Entries') self.group = self.wb.create_sheet('Grouped Entries') else: self.split = None self.group = self.wb.get_active_sheet().set_title('Entries') self.decoupled = decoupled # Initial titles self.titles = [ 'Date of Lead Publication', 'Imported By', 'Date Imported', 'Lead Title', 'Source', 'Assignee', 'Excerpt', ] def load_exportables(self, exportables, regions=None): # Take all exportables that contains excel info exportables = exportables.filter( data__excel__isnull=False, ) # information_date_index = 1 for exportable in exportables: # For each exportable, create titles according to type # and data data = exportable.data.get('excel') export_type = data.get('type') # if export_type == 'information-date': # self.titles.insert( # information_date_index, # data.get('title'), # ) # information_date_index += 1 if export_type == 'geo' and regions: for region in regions: admin_levels = region.adminlevel_set.all() for admin_level in admin_levels: self.titles.append(admin_level.title) elif export_type == 'multiple': self.titles.extend(data.get('titles')) elif data.get('title'): self.titles.append(data.get('title')) if self.decoupled and self.split: self.split.append([self.titles]) self.group.append([self.titles]) if self.decoupled and self.split: self.split.auto_fit_cells_in_row(1) self.group.auto_fit_cells_in_row(1) self.exportables = exportables self.regions = regions return self def add_entries(self, entries): for entry in entries: # Export each entry # Start building rows and export data for each exportable rows = RowsBuilder(self.split, self.group, self.decoupled) rows.add_value(format_date(entry.lead.published_on)) # TODO Check for information dates assignee = entry.lead.get_assignee() rows.add_value_list([ entry.created_by.profile.get_display_name(), format_date(entry.created_at.date()), entry.lead.title, entry.lead.source, assignee and assignee.profile.get_display_name(), entry.excerpt if entry.entry_type == Entry.EXCERPT else 'IMAGE', ]) for exportable in self.exportables: # Get export data for this entry corresponding to this # exportable # And write some value based on type and data # or empty strings if no data data = exportable.data.get('excel') export_data = ExportData.objects.filter( exportable=exportable, entry=entry, data__excel__isnull=False, ).first() export_data = export_data and export_data.data.get('excel') export_type = data.get('type') if export_type == 'multiple': col_span = len(data.get('titles')) if export_data: if export_data.get('type') == 'lists': rows.add_rows_of_value_lists( export_data.get('values'), col_span, ) else: rows.add_value_list( export_data.get('values'), ) else: rows.add_value_list([''] * col_span) elif export_type == 'geo' and self.regions: values = [] if export_data: values = export_data.get('values', []) values = [int(v) for v in values] for region in self.regions: admin_levels = region.adminlevel_set.all() for admin_level in admin_levels: geo_data = GeoArea.objects.filter( admin_level=admin_level, id__in=values, ).distinct() if geo_data.count() > 0: rows.add_rows_of_values([ g.title for g in geo_data ]) else: rows.add_value('') else: if export_data: if export_data.get('type') == 'list': rows.add_rows_of_values(export_data.get('value')) else: rows.add_value(export_data.get('value')) else: rows.add_value('') rows.apply() return self def export(self, export_entity): buffer = self.wb.save() filename = generate_filename('Entries Export', 'xlsx') export_entity.title = filename export_entity.type = 'entries' export_entity.format = 'xlsx' export_entity.pending = False export_entity.mime_type = EXCEL_MIME_TYPE export_entity.file.save(filename, ContentFile(buffer)) export_entity.save()
class NewExcelExporter: def __init__(self, sheets_data): """ sheets_data = { sheet1: { grouped_col: [ { col1: val1, col2: val2, col3: val3 }, { col1: val1, col2: val2, col3: val3 }, ... ], ungrouped_col: [ val1, val2, ... ], }, sheet2: { ... } } """ self.wb = WorkBook() self.sheets_data = sheets_data or {} self.wb_sheets = {} self.sheet_headers = {} # TODO: validate all data in each col, to see if each of them have same # structure(keys) for sheet, sheet_data in sheets_data.items(): self.wb_sheets[sheet] = self.wb.create_sheet( underscore_to_title(sheet)) self.sheet_headers[sheet] = {} for col, data in sheet_data.items(): if data and isinstance(data[0], dict): self.sheet_headers[sheet][col] = data[0].keys() else: self.sheet_headers[sheet][col] = [] def add_headers(self): for sheet, headerinfo in self.sheet_headers.items(): header_row = [] sub_header_row = [] for header, info in headerinfo.items(): header_row.append(underscore_to_title(str(header))) # Also add empty cells to account for sub headers if info: header_row.extend([""] * (len(info) - 1)) sub_header_row.extend( [underscore_to_title(x) for x in info]) else: sub_header_row.append("") # Append header rows to sheet self.wb_sheets[sheet].append([header_row, sub_header_row]) # Merge/style Headers counter = 1 for header, info in headerinfo.items(): wb_sheet = self.wb_sheets[sheet].ws if info: wb_sheet.merge_cells(start_row=1, start_column=counter, end_row=1, end_column=counter + len(info) - 1) counter += len(info) else: counter += 1 # Styling cell = wb_sheet.cell(row=1, column=counter) cell.alignment = Alignment(horizontal='center') cell.font = Font(bold=True) # Style sub headers for i, header in enumerate(sub_header_row): cell = wb_sheet.cell(row=2, column=i + 1) cell.font = Font(bold=True) self.wb_sheets[sheet].auto_fit_cells_in_row(1) self.wb_sheets[sheet].auto_fit_cells_in_row(2) def add_data_rows_to_sheets(self): for sheet, sheet_data in self.sheets_data.items(): zipped = zip(*sheet_data.values()) for row_data in zipped: row = [] for each in row_data: rowdata = each.values() if isinstance(each, dict) else [each] row.extend(rowdata) self.wb_sheets[sheet].append([row]) def export(self): # Write cols header first self.add_headers() # Add data rows self.add_data_rows_to_sheets() # Remove default sheet only if other sheets present if self.wb_sheets: self.wb.wb.remove(self.wb.wb.get_sheet_by_name('Sheet')) buffer = self.wb.save() return ContentFile(buffer)
class ExcelExporter: """ NOTE: Legacy exporter (Not used) """ def __init__(self, decoupled=True): self.wb = WorkBook() # Create two worksheets if decoupled: self.split = self.wb.get_active_sheet()\ .set_title('Split Assessments') self.group = self.wb.create_sheet('Grouped Assessments') else: self.split = None self.group = self.wb.get_active_sheet().set_title('Assessments') self.decoupled = decoupled # Cells to be merged self.merge_cells = {} # Initial titles self.lead_titles = [ 'Date of Lead Publication', 'Imported By', 'Lead Title', 'Source', ] self.titles = [*self.lead_titles] self.col_types = { 0: 'date', } self._titles_dict = {k: True for k in self.titles} self._headers_titles = OrderedDict() self._headers_added = False self._excel_rows = [] self._title_headers = [] self._headers_dict = {} self._flats = [] self._assessments = [] def to_flattened_key_vals(self, dictdata, parents=[]): """ Convert nested dictionary data to flat dict with keys and nondict values. @dictdata: nested dict structrure to be flattened @parents: parent titles for given level of title. Order [immediate parent, next parent, ... root] """ flat = OrderedDict() # to preserve order of titles for k, v in dictdata.items(): if isinstance(v, dict): # NOTE: this might override the keys in recursive calls flat.update(self.to_flattened_key_vals(v, [k, *parents])) elif isinstance(v, list): # check if list elements are dict or not for i in v: if isinstance(i, dict): flat.update( self.to_flattened_key_vals(i, [k, *parents])) else: vals = flat.get(k, {}).get('value', []) vals.append(i) # FIXME: assigning parents is repeated every step flat[k] = {'value': vals, 'parents': parents} else: # Just add key value flat[k] = {'value': v, 'parents': parents} return flat def add_assessments(self, assessments): for a in assessments: self.add_assessment(a) return self def add_assessment(self, assessment): jsondata = assessment.to_exportable_json() flat = self.to_flattened_key_vals(jsondata) self._flats.append(flat) self._assessments.append(assessment) # update the titles for k, v in flat.items(): parent = v['parents'][-1] header_titles = self._headers_titles.get(parent, []) if k not in header_titles: header_titles.append(k) self._headers_titles[parent] = header_titles ''' if not self._titles_dict.get(k): self.titles.append(k) self._titles_dict[k] = True ''' return self def get_titles(self): return [ *self.lead_titles, *[y for k, v in self._headers_titles.items() for y in v] ] def assessments_to_rows(self): for index, assessment in enumerate(self._assessments): rows = RowsBuilder(self.split, self.group, split=False) lead = assessment.lead rows.add_value_list([ format_date(lead.created_at), lead.created_by.username, lead.title, (lead.source and lead.source.title) or lead.source_raw, ]) headers_dict = {} flat = self._flats[index] for i, t in enumerate(self.get_titles()): v = flat.get(t) if not v and t not in self.lead_titles: rows.add_value("") self._title_headers.append("") continue elif not v: self._title_headers.append("") continue v = flat[t]['value'] val = ', '.join([str(x) for x in v]) if isinstance( v, list) else str(v) rows.add_value(val) header = flat[t]['parents'][-1] if not self._headers_dict.get(header): self._title_headers.append(header.upper()) self._headers_dict[header] = True else: self.merge_cells[header]['end'] += 1 if not headers_dict.get(header): self.merge_cells[header] = {'start': i, 'end': i} headers_dict[header] = True else: self._title_headers.append("") self._excel_rows.append(rows) def export(self): # Generate rows self.assessments_to_rows() # add header rows headerrows = RowsBuilder(self.split, self.group, split=False) headerrows.add_value_list(self._title_headers) headerrows.apply() self.group.append([self.get_titles()]) if self.decoupled and self.split: self.split.auto_fit_cells_in_row(1) self.group.auto_fit_cells_in_row(1) # add rows for rows in self._excel_rows: rows.apply() # merge cells if self.merge_cells: sheet = self.wb.wb.active for k, v in self.merge_cells.items(): sheet.merge_cells(start_row=1, start_column=v['start'] + 1, end_row=1, end_column=v['end'] + 1) cell = sheet.cell(row=1, column=v['start'] + 1) cell.alignment = Alignment(horizontal='center') self.group.set_col_types(self.col_types) if self.split: self.split.set_col_types(self.col_types) buffer = self.wb.save() return ContentFile(buffer)
class ExcelExporter: def __init__(self, decoupled=True): self.wb = WorkBook() # Create two worksheets if decoupled: self.split = self.wb.get_active_sheet()\ .set_title('Split Assessments') self.group = self.wb.create_sheet('Grouped Assessments') else: self.split = None self.group = self.wb.get_active_sheet().set_title('Assessments') self.decoupled = decoupled # Cells to be merged self.merge_cells = {} # Initial titles self.titles = [ 'Date of Lead Publication', 'Imported By', 'Lead Title', 'Source', ] self._titles_dict = {k: True for k in self.titles} def to_flattened_key_vals(self, dictdata, parents=[]): """ Convert nested dictionary data to flat dict with keys and nondict values. @dictdata: nested dict structrure to be flattened @parents: parent titles for given level of title. Order [immediate parent, next parent, ... root] """ flat = OrderedDict() # to preserve order of titles for k, v in dictdata.items(): if isinstance(v, dict): # NOTE: this might override the keys in recursive calls flat.update(self.to_flattened_key_vals(v, [k, *parents])) elif isinstance(v, list): # check if list elements are dict or not for i in v: if isinstance(i, dict): flat.update( self.to_flattened_key_vals(i, [k, *parents]) ) else: vals = flat.get(k, {}).get('value', []) vals.append(i) # FIXME: assigning parents is repeated every step flat[k] = {'value': vals, 'parents': parents} else: # Just add key value flat[k] = {'value': v, 'parents': parents} return flat def add_assessments(self, assessments): for a in assessments: self.add_assessment(a) return self def add_assessment(self, assessment): jsondata = assessment.to_exportable_json() flat = self.to_flattened_key_vals(jsondata) rows = RowsBuilder(self.split, self.group, split=False) rows.add_value_list([ format_date(assessment.lead.created_at), assessment.lead.created_by.username, assessment.lead.title, assessment.lead.source ]) # update the titles for k, v in flat.items(): if not self._titles_dict.get(k): self.titles.append(k) self._titles_dict[k] = True # create headers self.merge_cells = {} # to store which cells to merge headers_dict = {} title_headers = [] for i, t in enumerate(self.titles): v = flat.get(t) if v: v = flat[t]['value'] val = ', '.join(v) if isinstance(v, list) else str(v) rows.add_value(val) header = flat[t]['parents'][-1] if not headers_dict.get(header): self.merge_cells[header] = {'start': i, 'end': i} title_headers.append(header.upper()) headers_dict[header] = True else: self.merge_cells[header]['end'] += 1 title_headers.append("") else: title_headers.append("") headerrows = RowsBuilder(self.split, self.group, split=False) headerrows.add_value_list(title_headers) headerrows.apply() self.group.append([self.titles]) if self.decoupled and self.split: self.split.auto_fit_cells_in_row(1) self.group.auto_fit_cells_in_row(1) rows.apply() return self def export(self, export_entity): # merge cells if self.merge_cells: sheet = self.wb.wb.active for k, v in self.merge_cells.items(): sheet.merge_cells( start_row=1, start_column=v['start'] + 1, end_row=1, end_column=v['end'] + 1 ) cell = sheet.cell(row=1, column=v['start'] + 1) cell.alignment = Alignment(horizontal='center') buffer = self.wb.save() filename = generate_filename('Assessments Export', 'xlsx') export_entity.title = filename export_entity.type = Export.ASSESSMENTS export_entity.format = 'xlsx' export_entity.pending = False export_entity.mime_type = EXCEL_MIME_TYPE export_entity.file.save(filename, ContentFile(buffer)) export_entity.save()