def generate_layout(): ckan_api = CkanApi() return html.Div(children=[ # Datasets By Domain header( 'Portal Totals', 'portal-totals', html.Div( id='portal-totals-tooltip-div', children=[ html.Span("Datasets In Portal", style={'font-weight': 'bold'}), html.Span(" - Total number of datasets in portal."), html.Br(), html.Br(), html.Span("Scraped Datasets", style={'font-weight': 'bold'}), html.Span(" - Total number of scraped datasets."), html.Br(), html.Br(), html.Span("Datasets Amended", style={'font-weight': 'bold'}), html.Span( " - Total number of scraped datasets which have been amended by a user." ), html.Br(), html.Br(), html.Span("Datasets Manually Added", style={'font-weight': 'bold'}), html.Span( " - Total number of datasets manually added by a user." ) ])), html.Hr(), # LED displays led_display(ckan_api.total_datasets(), "Datasets In Portal"), led_display(ckan_api.total_scraped_datasets(), "Scraped Datasets"), led_display(ckan_api.total_amended_datasets(), "Datasets Amended By User"), led_display(ckan_api.total_manual_datasets(), "Datasets Manually Added"), # Total dataset in the portal pie chart html.Div([ datasets_in_portal_pie(ckan_api=ckan_api), ], style={'vertical-align': 'middle'}), ])
def __init__(self): self.ckan_api = CkanApi()
class InsightsPage(): # path to Excel sheet used for creating stas dataframes PATH_TO_EXCEL_SHEET = os.path.join(os.getenv('ED_OUTPUT_PATH'), 'tools', 'stats', 'metrics.xlsx') def __init__(self): self.ckan_api = CkanApi() def _get_df_from_excel_sheet(self, sheet_name): """ private helper function used to read excel sheets and create dataframes from the specified sheet""" return pd.read_excel(self.PATH_TO_EXCEL_SHEET, sheet_name, engine='openpyxl') def dataset_by_domain_portal_df(self): data = self.ckan_api.datasets_by_domain() domains = [] counts = [] for name, count in data: domains.append(name) counts.append(count) df = pd.DataFrame(columns=['domain', 'page count']) df['domain'] = domains df['page count'] = counts df.sort_values(by='page count', axis='index', ascending=False, inplace=True, ignore_index=True) return df def resource_by_domain_portal_df(self): data = self.ckan_api.resources_by_domain() domains = [] counts = [] for name, count in data: domains.append(name) counts.append(count) df = pd.DataFrame(columns=['domain', 'resource count']) df['domain'] = domains df['resource count'] = counts df.sort_values(by='resource count', axis='index', ascending=False, inplace=True, ignore_index=True) return df def dataset_by_domain_table(self): """ function used to create the Table component which displays the number of pages/datasets obtained from each domain """ df = self.dataset_by_domain_portal_df() # add a total of page count at the end of the df total_page_count = df['page count'].sum() df_total = pd.DataFrame([['Total', total_page_count]], columns=['domain', 'page count']) df = df.append(df_total, ignore_index=True) # create the DataTable return dash_table.DataTable( id='dataset_by_domain_table', #columns=[{"name": i, "id": i} \ # if i != "page count" else \ # {"name": "Dataset Count", "id": i} \ # for i in df.columns], columns=[{ "id": "domain", "name": "Domain" }, { "id": "page count", "name": "Dataset Count" }], data=df.to_dict('records'), sort_action='native', style_cell={ 'textAlign': 'left', 'whiteSpace': 'normal' }, #fixed_rows={ 'headers': True, 'data': 0 }, #virtualization=True, style_cell_conditional=[ { 'if': { 'column_id': 'domain' }, 'width': '70%', 'textAlign': 'right' }, { 'if': { 'column_id': 'page count' }, 'width': '30%' }, #{'if': {'row_index': 'odd'}, #'backgroundColor': 'rgb(248, 248, 248)'} ], style_table={ 'maxHeight': '300px', 'maxWidth': '100%', 'overflowY': 'auto', 'overflowX': 'hidden', 'margin': 0, 'padding': 0, }, style_header={ 'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold', 'textAlign': 'center', }) def dataset_by_domain_bar(self): """ function creates a bar chart which displays the number of pages/datasets per domain """ df = self.dataset_by_domain_portal_df() # create the bar chart using the created dataframe return dcc.Graph( id='dataset_by_domain_graph', figure={ 'data': [{ 'x': df['domain'], 'y': df['page count'], 'type': 'bar' }], 'layout': { #'title': 'Datasets by Domain' } }, config={'modeBarButtonsToRemove': buttonsToRemove}) def resources_by_domain_table(self): """ function is used to create DataTable containing the number of resources/domains """ working_df1 = self.resource_by_domain_portal_df() # sort values working_df1.sort_values(by='resource count', axis='index', ascending=False, inplace=True, ignore_index=True) # add a total of resource count at the end of the df total_resource_count = working_df1['resource count'].sum() df_total = pd.DataFrame([['Total', total_resource_count]], columns=['domain', 'resource count']) working_df1 = working_df1.append(df_total, ignore_index=True) # return the created DataTable return dash_table.DataTable( id='resource_by_domain_table', #columns=[{"name": i, "id": i} for i in working_df1.columns], columns=[{ "id": "domain", "name": "Domain" }, { "id": "resource count", "name": "Resource Count" }], data=working_df1.to_dict('records'), sort_action='native', style_cell={ 'textAlign': 'left', 'whiteSpace': 'normal' }, #fixed_rows={ 'headers': True, 'data': 0 }, #virtualization=True, style_cell_conditional=[ { 'if': { 'column_id': 'domain' }, 'width': '70%', 'textAlign': 'right' }, { 'if': { 'column_id': 'resource count' }, 'width': '30%' }, #{'if': {'row_index': 'odd'}, #'backgroundColor': 'rgb(248, 248, 248)'} ], style_table={ 'maxHeight': '300px', 'maxWidth': '100%', 'overflowY': 'scroll', 'overflowX': 'hidden', 'margin': 0, 'padding': 0 }, style_header={ 'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold', 'textAlign': 'center', }) def total_domains_count_stats(self): df = self._get_df_from_excel_sheet('RESOURCE COUNT PER DOMAIN') return df['domain'].count() def total_page_count_stats(self): df = self._get_df_from_excel_sheet('PAGE COUNT PER DOMAIN') return df['page count'].sum() def total_resource_count_stats(self): df = self._get_df_from_excel_sheet('RESOURCE COUNT PER DOMAIN') return df['resource count'].sum() def total_datasets_count_stats(self): df = self._get_df_from_excel_sheet('DATASET COUNT PER SCRAPER') return df['dataset count'].sum() def resources_by_publisher_portal_df(self): data = self.ckan_api.resources_by_publisher() publishers = [] counts = [] for name, count in data: publishers.append(name) counts.append(count) df = pd.DataFrame(columns=['publisher', 'resource count']) df['publisher'] = publishers df['resource count'] = counts df.sort_values(by='resource count', axis='index', ascending=False, inplace=True, ignore_index=True) return df def resources_by_domain_pie(self): """" function is used to created a pie chart showing the number of resources gotten per domain """ df = self.resource_by_domain_portal_df() wrapped_domain_names = [] for domain in df['domain']: wrapped_domain_names.append( str('<br>'.join(textwrap.wrap(domain, width=20)))) df['domain'] = wrapped_domain_names pie_figure = go.Figure(data=[ go.Pie( labels=df['domain'], values=df['resource count'], title={ #'text': 'Resources By Domain', 'font': { 'size': 16 }, 'position': 'bottom right' }) ]) pie_figure.update_traces(textposition='inside', textinfo='value+label') # return the pie chart return dcc.Graph(id='resources_by_domain_pie', figure=pie_figure, config={'modeBarButtonsToRemove': buttonsToRemove}) def resources_by_publisher_table(self): df = self.resources_by_publisher_portal_df() # add a total of resource count at the end of the df total_resource_count = df['resource count'].sum() df_total = pd.DataFrame([['Total', total_resource_count]], columns=['publisher', 'resource count']) df = df.append(df_total, ignore_index=True) # return the created DataTable return dash_table.DataTable( id='resource_by_publisher_table', columns=[{ "id": "publisher", "name": "Publisher" }, { "id": "resource count", "name": "Resource Count" }], data=df.to_dict('records'), sort_action='native', style_cell={ 'textAlign': 'left', 'whiteSpace': 'normal' }, style_cell_conditional=[ { 'if': { 'column_id': 'publisher' }, 'width': '70%', 'textAlign': 'right' }, { 'if': { 'column_id': 'resource count' }, 'width': '30%' }, #{'if': {'row_index': 'odd'}, #'backgroundColor': 'rgb(248, 248, 248)'} ], style_table={ 'maxHeight': '300px', 'maxWidth': '100%', 'overflowY': 'scroll', 'overflowX': 'hidden', 'margin': 0, 'padding': 0 }, style_header={ 'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold', 'textAlign': 'center', }) def resources_by_publisher_pie(self): df = self.resources_by_publisher_portal_df() wrapped_publisher_names = [] for publisher in df['publisher']: wrapped_publisher_names.append( str('<br>'.join(textwrap.wrap(publisher, width=20)))) df['publisher'] = wrapped_publisher_names pie_figure = go.Figure(data=[ go.Pie( labels=df['publisher'], values=df['resource count'], title={ #'text': 'Resources By Domain', 'font': { 'size': 16 }, 'position': 'bottom right' }) ]) pie_figure.update_traces(textposition='inside', textinfo='value+label') # return the pie chart return dcc.Graph(id='resources_by_publisher_pie', figure=pie_figure, config={'modeBarButtonsToRemove': buttonsToRemove}) def dataset_by_office_portal_data(self): rows = [] total = 0 datasets_by_publisher = self.ckan_api.datasets_by_publisher() for name, count in datasets_by_publisher: rows.append({'s': name, 'datopian': count}) total += count rows.sort(key=lambda item: item['datopian'], reverse=True) rows.append({'s': 'Total', 'datopian': total}) return rows def get_datasets_bars_portal_data(self): data_list = list() datasets_by_publisher = self.ckan_api.datasets_by_publisher() for name, count in datasets_by_publisher: legend_str = '<br>'.join(textwrap.wrap(name, width=20)) data_list.append({ 'x': ['Datasets'], 'y': [count], 'type': 'bar', 'name': legend_str }) data_list.sort(key=lambda item: item['y'][0], reverse=True) return data_list