Exemple #1
0
def dataset_full_path_view(request, group_slug, set_slug):
    dataset = find_dataset(group_slug, set_slug)

    source_tables = sorted(dataset.sourcetable_set.all(), key=lambda x: x.name)
    source_views = dataset.sourceview_set.all()
    custom_queries = dataset.customdatasetquery_set.all()

    if source_tables:
        columns = []
        for table in source_tables:
            columns += [
                "{}.{}".format(table.table, column)
                for column in datasets_db.get_columns(
                    table.database.memorable_name,
                    schema=table.schema,
                    table=table.table,
                )
            ]
    elif source_views:
        columns = datasets_db.get_columns(
            source_views[0].database.memorable_name,
            schema=source_views[0].schema,
            table=source_views[0].view,
        )
    elif custom_queries:
        columns = datasets_db.get_columns(
            custom_queries[0].database.memorable_name, query=custom_queries[0].query
        )
    else:
        columns = None

    context = {
        'model': dataset,
        'has_access': dataset.user_has_access(request.user),
        'data_links': sorted(
            chain(
                dataset.sourcelink_set.all(),
                source_tables,
                source_views,
                custom_queries,
            ),
            key=lambda x: x.name,
        ),
        'fields': columns,
    }
    if dataset.type == dataset.TYPE_MASTER_DATASET:
        return render(request, 'datasets/master_dataset.html', context)
    return render(request, 'datasets/data_cut_dataset.html', context)
Exemple #2
0
 def get_context_data(self, **kwargs):
     ctx = super().get_context_data(**kwargs)
     table = self.get_object()
     db_name = list(settings.DATABASES_DATA.items())[0][0]
     table_name = (
         f"{table.table_name}_{table.data_flow_execution_date.strftime('%Y%m%dt%H%M%S_swap')}"
     )
     ctx.update({
         "fields":
         datasets_db.get_columns(db_name,
                                 schema=table.schema,
                                 table=table_name),
         "records": [],
     })
     if ctx["fields"]:
         rows = get_random_data_sample(
             db_name,
             sql.SQL(f"select * from {table.schema}.{table_name}"),
             settings.DATASET_PREVIEW_NUM_OF_ROWS,
         )
         ctx["records"] = [{
             column: row[i]
             for i, column in enumerate(ctx["fields"])
         } for row in rows]
     return ctx
Exemple #3
0
    def get_context_data(self, **kwargs):
        table = self.get_object()
        db_name = list(settings.DATABASES_DATA.items())[0][0]
        table_name = (
            f"{table.table_name}_{table.data_flow_execution_date.strftime('%Y%m%dt%H%M%S_swap')}"
        )
        schema_name = table.schema
        columns = datasets_db.get_columns(db_name, schema=schema_name, table=table_name)
        query = f"""
            select * from "{schema_name}"."{table_name}"
        """
        records = []
        sample_size = settings.DATASET_PREVIEW_NUM_OF_ROWS
        if columns:
            rows = get_random_data_sample(
                db_name,
                sql.SQL(query),
                sample_size,
            )
            for row in rows:
                record_data = {}
                for i, column in enumerate(columns):
                    record_data[column] = row[i]
                records.append(record_data)

        ctx = super().get_context_data(**kwargs)
        ctx["fields"] = columns
        ctx["records"] = records
        ctx["preview_limit"] = sample_size
        ctx["record_count"] = len(records)
        ctx["fixed_table_height_limit"] = (10,)
        ctx["truncate_limit"] = 100
        return ctx
Exemple #4
0
 def _get_columns(self):
     """
     Return a list of columns in the datasets db for this source table
     """
     source_table = self.get_object()
     return datasets_db.get_columns(
         source_table.database.memorable_name,
         schema=source_table.schema,
         table=source_table.table,
         include_types=True,
     )
Exemple #5
0
 def get_preview_data(self, dataset):
     source_table_object = get_object_or_404(
         self.model, id=self.kwargs.get('table_uuid'), dataset=dataset)
     database_name = source_table_object.database.memorable_name
     table_name = source_table_object.table
     schema_name = source_table_object.schema
     columns = datasets_db.get_columns(database_name,
                                       schema=schema_name,
                                       table=table_name)
     preview_query = f"""
         select * from "{schema_name}"."{table_name}"
     """
     return source_table_object, columns, preview_query
Exemple #6
0
    def get_preview_data(self, dataset):
        query_object = get_object_or_404(self.model,
                                         id=self.kwargs.get('query_id'),
                                         dataset=dataset)

        if not query_object.reviewed and not self.request.user.is_superuser:
            raise PermissionDenied()

        database_name = query_object.database.memorable_name
        columns = datasets_db.get_columns(
            database_name,
            query=query_object.query,
        )
        preview_query = query_object.query

        return query_object, columns, preview_query
Exemple #7
0
    def get(self, request, *args, **kwargs):
        dataset = find_dataset(self.kwargs.get('dataset_uuid'), request.user)

        if not dataset.user_has_access(self.request.user):
            return HttpResponseForbidden()

        query = get_object_or_404(self.model,
                                  id=self.kwargs.get('query_id'),
                                  dataset=dataset)

        if not query.reviewed and not request.user.is_superuser:
            return HttpResponseForbidden()

        database = query.database.memorable_name

        columns = datasets_db.get_columns(database, query=query.query)

        records = []
        sample_size = settings.DATACUT_DATASET_PREVIEW_NUM_OF_ROWS
        if columns:
            rows = get_random_data_sample(database, sql.SQL(query.query),
                                          sample_size)
            for row in rows:
                record_data = {}
                for i, column in enumerate(columns):
                    record_data[column] = row[i]
                records.append(record_data)

        return render(
            request,
            'datasets/data_cut_preview.html',
            {
                'dataset': dataset,
                'query': query,
                'fields': columns,
                'records': records,
                'preview_limit': sample_size,
                'record_count': len(records),
                'fixed_table_height_limit': 10,
                'truncate_limit': 100,
            },
        )
Exemple #8
0
    def get(self, request, chart_id):
        try:
            chart = ChartBuilderChart.objects.get(created_by=request.user,
                                                  pk=chart_id)
        except ChartBuilderChart.DoesNotExist:
            return JsonResponse(
                {
                    "state": QueryLogState.FAILED,
                    "error": "Query does not exist"
                },
                status=404)

        return JsonResponse({
            "state":
            chart.query_log.state,
            "error":
            chart.query_log.error,
            "columns":
            datasets_db.get_columns(chart.query_log.connection,
                                    query=str(chart.query_log.sql))
            if chart.query_log.state == QueryLogState.COMPLETE else [],
        })
Exemple #9
0
    def get_context_data(self, **kwargs):
        ctx = super().get_context_data()
        ctx['model'] = self.object

        if self._is_reference_dataset():
            records = self.object.get_records()
            total_record_count = records.count()
            preview_limit = self.get_preview_limit(total_record_count)
            records = records[:preview_limit]

            ctx.update({
                'preview_limit': preview_limit,
                'record_count': total_record_count,
                'records': records,
            })
            return ctx

        elif self._is_visualisation():
            ctx.update({
                'has_access':
                self.object.user_has_access(self.request.user),
                "visualisation_link":
                self.object.get_visualisation_link(self.request),
            })
            return ctx

        source_tables = sorted(self.object.sourcetable_set.all(),
                               key=lambda x: x.name)
        source_views = self.object.sourceview_set.all()
        custom_queries = self.object.customdatasetquery_set.all()

        if source_tables:
            columns = []
            for table in source_tables:
                columns += [
                    "{}.{}".format(table.table, column)
                    for column in datasets_db.get_columns(
                        table.database.memorable_name,
                        schema=table.schema,
                        table=table.table,
                    )
                ]
        elif source_views:
            columns = datasets_db.get_columns(
                source_views[0].database.memorable_name,
                schema=source_views[0].schema,
                table=source_views[0].view,
            )
        elif custom_queries:
            columns = datasets_db.get_columns(
                custom_queries[0].database.memorable_name,
                query=custom_queries[0].query)
        else:
            columns = None

        data_links = sorted(
            chain(
                self.object.sourcelink_set.all(),
                source_tables,
                source_views,
                custom_queries,
            ),
            key=lambda x: x.name,
        )

        DataLinkWithLinkToggle = namedtuple('DataLinkWithLinkToggle',
                                            ('data_link', 'can_show_link'))
        data_links_with_link_toggle = [
            DataLinkWithLinkToggle(
                data_link=data_link,
                can_show_link=data_link.can_show_link_for_user(
                    self.request.user),
            ) for data_link in data_links
        ]

        ctx.update({
            'has_access':
            self.object.user_has_access(self.request.user),
            'data_links_with_link_toggle':
            data_links_with_link_toggle,
            'fields':
            columns,
            'data_hosted_externally':
            any(not source_link.url.startswith('s3://')
                for source_link in self.object.sourcelink_set.all()),
            'code_snippets':
            get_code_snippets(self.object),
        })
        return ctx
Exemple #10
0
    def get_context_data(self, **kwargs):
        ctx = super().get_context_data()
        ctx['model'] = self.object

        if self._is_reference_dataset():
            records = self.object.get_records()
            total_record_count = records.count()
            preview_limit = self.get_preview_limit(total_record_count)
            records = records[:preview_limit]

            ctx.update({
                'preview_limit': preview_limit,
                'record_count': total_record_count,
                'records': records,
            })
            return ctx

        elif self._is_visualisation():
            ctx.update({
                'has_access':
                self.object.user_has_access(self.request.user),
                "visualisation_links":
                self.object.get_visualisation_links(self.request),
            })
            return ctx

        source_tables = sorted(self.object.sourcetable_set.all(),
                               key=lambda x: x.name)
        source_views = self.object.sourceview_set.all()
        custom_queries = self.object.customdatasetquery_set.all(
        ).prefetch_related('tables')

        if source_tables:
            columns = []
            for table in source_tables:
                columns += [
                    "{}.{}".format(table.table, column)
                    for column in datasets_db.get_columns(
                        table.database.memorable_name,
                        schema=table.schema,
                        table=table.table,
                    )
                ]
        elif source_views:
            columns = datasets_db.get_columns(
                source_views[0].database.memorable_name,
                schema=source_views[0].schema,
                table=source_views[0].view,
            )
        elif custom_queries:
            columns = datasets_db.get_columns(
                custom_queries[0].database.memorable_name,
                query=custom_queries[0].query)
        else:
            columns = None

        data_links = sorted(
            chain(
                self.object.sourcelink_set.all(),
                source_tables,
                source_views,
                custom_queries,
            ),
            key=lambda x: x.name,
        )

        DataLinkWithLinkToggle = namedtuple(
            'DataLinkWithLinkToggle',
            ('data_link', 'can_show_link', 'code_snippets'))
        data_links_with_link_toggle = [
            DataLinkWithLinkToggle(
                data_link=data_link,
                can_show_link=data_link.can_show_link_for_user(
                    self.request.user),
                code_snippets=get_code_snippets(data_link),
            ) for data_link in data_links
        ]

        quicksight_dashboard_id = self.request.GET.get(
            "quicksight_dashboard_id", None)
        if quicksight_dashboard_id:
            _, dashboard_url = get_quicksight_dashboard_name_url(
                quicksight_dashboard_id, self.request.user)
        else:
            _, dashboard_url = None, None

        query_tables = []
        for query in custom_queries:
            query_tables.extend([qt.table for qt in query.tables.all()])

        ds_tables = SourceTable.objects.filter(
            dataset__published=True,
            table__in=query_tables,
        ).prefetch_related('dataset')
        related_masters = [ds_table.dataset for ds_table in ds_tables]

        ctx.update({
            'has_access':
            self.object.user_has_access(self.request.user),
            'data_links_with_link_toggle':
            data_links_with_link_toggle,
            'fields':
            columns,
            'data_hosted_externally':
            any(not source_link.url.startswith('s3://')
                for source_link in self.object.sourcelink_set.all()),
            'visualisation_src':
            dashboard_url,
            'custom_dataset_query_type':
            DataLinkType.CUSTOM_QUERY.value,
            'source_table_type':
            DataLinkType.SOURCE_TABLE.value,
            'related_masters':
            set(related_masters),
        })
        return ctx