Esempio n. 1
0
    def get(self, request, project_id: int, *_args, **_kwargs):
        try:
            start = time.time()
            project = Project.objects.filter(pk=project_id).select_related('type').first()
            if not project:
                return Response({'error': 'Project not found'}, status=404)

            saved_filters = as_int_list(request.GET, 'saved_filters')  # type: List[int]

            query_results = query_documents(requester=request.user,
                                            document_type=project.type,
                                            project_ids=[project.pk],
                                            saved_filter_ids=saved_filters,
                                            return_reviewed_count=True,
                                            return_documents=False,
                                            return_total_count=True,
                                            include_annotation_fields=True)  # type: DocumentQueryResults
            if not query_results:
                return Response({'time': time.time() - start})

            return _query_results_to_json(query_results, time.time() - start)
        except APIRequestError as e:
            return e.to_response()
        except Exception as e:
            return APIRequestError(message='Unable to process request', caused_by=e, http_status_code=500).to_response()
Esempio n. 2
0
    def __init__(self, *args, **kwargs):
        """
        Automatically populates the selection choices based on:
        - HighQConfiguration DocumentType
        - HighQConfiguration iSheet ID
        """
        parent = kwargs.pop('parent', None)
        super(HighQiSheetColumnAssociationForm, self).__init__(*args, **kwargs)
        if parent is not None:
            if parent.contraxsuite_documenttype_id:
                document_type: DocumentType = \
                    DocumentType.objects.get(uid=parent.contraxsuite_documenttype_id)

                document_field_annotation_codes: Generator[str, None, None] = (
                    f'{document_field}_ann'
                    for document_field in DocumentField.objects.filter(
                        document_type=document_type).values_list('code',
                                                                 flat=True))

                column_codes: Tuple[str] = tuple(
                    query_documents(document_type=document_type).column_codes)

                choices_field_code: Tuple[Tuple[str, str], ...] = tuple(
                    (field_code, field_code)
                    for field_code in sorted((*document_field_annotation_codes,
                                              *column_codes)))

                self.fields['contraxsuite_field_code'].widget = \
                    Select(choices=choices_field_code)

            if parent.isheet_id:
                try:
                    highq_configuration: HighQConfiguration = \
                        parent.highqconfiguration_set.first()

                    highq_api_client: HighQ_API_Client = \
                        HighQ_API_Client(highq_configuration=highq_configuration)

                    isheet_columns: \
                        Tuple[Tuple[str, int, Optional[List[Tuple[str, int]]]]] = \
                        tuple(highq_api_client.fetch_column_ids_names_choices(
                            isheetid=parent.isheet_id
                        ))

                    choices_column_id: Tuple[Tuple[int, str]] = tuple(
                        (column[1], f'{column[0]} {{{column[1]}}}')
                        for column in isheet_columns)

                    self.fields['highq_isheet_column_id'].widget = \
                        Select(choices=choices_column_id)
                except:
                    pass
 def prepare_documents(
         self, document_type: DocumentType, user: User,
         field_codes: List[str], period_start: datetime.datetime,
         period_end: datetime.datetime) -> DocumentQueryResults:
     return query_documents(
         requester=user,
         document_type=document_type,
         field_codes=field_codes,
         order_by=[(FIELD_CODE_ASSIGN_DATE, SortDirection.ASC)],
         filters_sql=SQLClause(
             '{f} >= %s and {f} <= %s'.format(f=FIELD_CODE_CREATE_DATE),
             [period_start, period_end]),
         return_total_count=False,
         return_reviewed_count=False,
         limit=1000)
Esempio n. 4
0
 def prepare_documents(self,
                       document_type: DocumentType,
                       user: User,
                       field_codes: List[str],
                       period_start: datetime.datetime,
                       period_end: datetime.datetime) -> DocumentQueryResults:
     return query_documents(requester=None,
                            document_type=document_type,
                            field_codes=field_codes,
                            order_by=[(FIELD_CODE_ASSIGN_DATE, SortDirection.ASC)],
                            filters_sql=SQLClause('{assignee_id} = %s and {f} = False'
                                                  .format(assignee_id=FIELD_CODE_ASSIGNEE_ID,
                                                          f=FIELD_CODE_IS_COMPLETED), [user.pk]),
                            return_total_count=False,
                            return_reviewed_count=False,
                            limit=1000)
Esempio n. 5
0
    def get(self, request, document_type_code: str, *_args, **_kwargs):
        start = time.time()
        try:
            document_type = DocumentType.objects.get(code=document_type_code)

            project_ids = as_int_list(request.GET, 'project_ids')  # type: List[int]

            columns = as_str_list(request.GET, 'columns')

            include_annotations = as_bool(request.GET, 'associated_text')
            if include_annotations:
                all_annotation_columns = get_annotation_columns(document_type)
                columns += [i.field_code for i in all_annotation_columns
                            if i.field_code.rstrip(FIELD_CODE_ANNOTATION_SUFFIX) in columns]

            fmt = request.GET.get('fmt') or self.FMT_JSON

            offset = as_int(request.GET, 'offset', None)
            if offset is not None and offset < 0:
                offset = None

            limit = as_int(request.GET, 'limit', None)
            if limit is not None and limit <= 0:
                limit = None

            # For json output we limit number of returned documents because we dont use streaming response for JSON
            # and want to keep it fast.
            if fmt == self.FMT_JSON and self.MAX_RETURNED_DOCUMENTS_JSON is not None \
                    and (limit is None or limit > self.MAX_RETURNED_DOCUMENTS_JSON):
                limit = self.MAX_RETURNED_DOCUMENTS_JSON

            saved_filters = as_int_list(request.GET, 'saved_filters')  # type: List[int]

            column_filters = list()
            for param, value in request.GET.items():  # type: str, str
                if param.startswith(self.URL_PARAM_PREFIX_FILTER):
                    column_filters.append((param[len(self.URL_PARAM_PREFIX_FILTER):], value))

            order_by = request.GET.get('order_by') or None  # type: str
            order_by = parse_order_by(order_by) if order_by else None

            save_filter = as_bool(request.GET, 'save_filter', False)  # type: bool

            return_reviewed = as_bool(request.GET, 'return_reviewed', False)
            return_total = as_bool(request.GET, 'return_total', True)
            return_data = as_bool(request.GET, 'return_data', True)
            ignore_errors = as_bool(request.GET, 'ignore_errors', True)

            if project_ids and save_filter:
                column_filters_dict = {c: f for c, f in column_filters}
                for project_id in project_ids:
                    with transaction.atomic():
                        obj = SavedFilter.objects.create(user=request.user,
                                                         document_type=document_type,
                                                         filter_type=FT_USER_DOC_GRID_CONFIG,
                                                         project_id=project_id,
                                                         columns=columns,
                                                         column_filters=column_filters_dict,
                                                         title=None,
                                                         order_by=[(column, direction.value)
                                                                   for
                                                                   column, direction in
                                                                   order_by] if order_by
                                                         else None
                                                         )
                        SavedFilter.objects.filter(user=request.user,
                                                   document_type=document_type,
                                                   filter_type=FT_USER_DOC_GRID_CONFIG,
                                                   project_id=project_id) \
                            .exclude(pk=obj.pk) \
                            .delete()

            query_results = query_documents(requester=request.user,
                                            document_type=document_type,
                                            project_ids=project_ids,
                                            column_names=columns,
                                            saved_filter_ids=saved_filters,
                                            column_filters=column_filters,
                                            order_by=order_by,
                                            offset=offset,
                                            limit=limit,
                                            return_documents=return_data,
                                            return_reviewed_count=return_reviewed,
                                            return_total_count=return_total,
                                            ignore_errors=ignore_errors,
                                            include_annotation_fields=True)  # type: DocumentQueryResults

            if fmt in {self.FMT_XLSX, self.FMT_CSV} and not return_data:
                raise APIRequestError('Export to csv/xlsx requested with return_data=false')

            if fmt == self.FMT_CSV:
                return _query_results_to_csv(query_results)
            elif fmt == self.FMT_XLSX:
                return _query_results_to_xlsx(query_results)
            else:
                if query_results is None:
                    return Response({'time': time.time() - start})
                return _query_results_to_json(query_results, time.time() - start)
        except APIRequestError as e:
            return e.to_response()
        except Exception as e:
            return APIRequestError(message='Unable to process request', caused_by=e, http_status_code=500).to_response()
Esempio n. 6
0
    def get(self, request, document_type_code: str, *_args, **_kwargs):
        start = time.time()
        try:
            document_type = DocumentType.objects.get(code=document_type_code)

            project_ids = as_int_list(request.GET,
                                      'project_ids')  # type: List[int]

            columns = as_str_list(request.GET, 'columns')

            fmt = request.GET.get('fmt') or self.FMT_JSON

            offset = as_int(request.GET, 'offset', None)
            if offset is not None and offset < 0:
                offset = None

            limit = as_int(request.GET, 'limit', None)
            if limit is not None and limit <= 0:
                limit = None

            # For json output we limit number of returned documents because we dont use streaming response for JSON
            # and want to keep it fast.
            if fmt == self.FMT_JSON and (
                    limit is None or limit > self.MAX_RETURNED_DOCUMENTS_JSON):
                limit = self.MAX_RETURNED_DOCUMENTS_JSON

            saved_filters = as_int_list(request.GET,
                                        'saved_filters')  # type: List[int]

            column_filters = list()
            for param, value in request.GET.items():  # type: str, str
                if param.startswith(self.URL_PARAM_PREFIX_FILTER):
                    column_filters.append(
                        (param[len(self.URL_PARAM_PREFIX_FILTER):], value))

            order_by = request.GET.get('order_by') or None  # type: str
            order_by = parse_order_by(order_by) if order_by else None

            save_filter = as_bool(request.GET, 'save_filter',
                                  False)  # type: bool

            return_reviewed = as_bool(request.GET, 'return_reviewed', False)
            return_total = as_bool(request.GET, 'return_total', True)
            return_data = as_bool(request.GET, 'return_data', True)
            ignore_errors = as_bool(request.GET, 'ignore_errors', True)

            if project_ids and save_filter:
                column_filters_dict = {c: f for c, f in column_filters}
                for project_id in project_ids:
                    SavedFilter.objects.update_or_create(
                        user=request.user,
                        document_type=document_type,
                        filter_type=FT_USER_DOC_GRID_CONFIG,
                        project_id=project_id,
                        defaults={
                            'user':
                            request.user,
                            'document_type':
                            document_type,
                            'filter_type':
                            FT_USER_DOC_GRID_CONFIG,
                            'project_id':
                            project_id,
                            'columns':
                            columns,
                            'column_filters':
                            column_filters_dict,
                            'title':
                            None,
                            'order_by': [(column, direction.value)
                                         for column, direction in order_by]
                            if order_by else None
                        })
            query_results = query_documents(
                requester=request.user,
                document_type=document_type,
                project_ids=project_ids,
                column_names=columns,
                saved_filter_ids=saved_filters,
                column_filters=column_filters,
                order_by=order_by,
                offset=offset,
                limit=limit,
                return_documents=return_data,
                return_reviewed_count=return_reviewed,
                return_total_count=return_total,
                ignore_errors=ignore_errors)  # type: DocumentQueryResults

            if fmt.lower() == 'csv':
                if not return_data:
                    raise APIRequestError(
                        'Export to csv requested with return_data=false')
                else:
                    resp = StreamingHttpResponse(csv_gen(
                        query_results.column_codes, query_results.fetch(),
                        query_results.column_titles),
                                                 content_type='text/csv')
                    resp[
                        'Content-Disposition'] = 'attachment; filename="export.csv"'
                    return resp
            else:
                if query_results is None:
                    return Response({'time': time.time() - start})

                # As we limit the number of returned documents for JSON we can keep response in non-streaming form.
                return Response(
                    _query_results_to_json(query_results,
                                           time.time() - start))

                # Switch to StreamingHttpResponse if/when we really need to return very big json output.
                # _query_results_to_json() returns dict with document items backed with a generator.
                # But on local tests for small number of documents the streaming json output works two times
                # slower than non-streaming response. CSV works the same fast.
                # return StreamingHttpResponse(json_gen(_query_results_to_json(query_results, time.time() - start)),
                #       content_type='application/json')
        except APIRequestError as e:
            return e.to_response()
        except Exception as e:
            return APIRequestError(message='Unable to process request',
                                   caused_by=e,
                                   http_status_code=500).to_response()
Esempio n. 7
0
    def get(self, request, document_type_code: str, *_args, **_kwargs):
        start = time.time()
        try:
            document_type = DocumentType.objects.get(code=document_type_code)

            project_ids = as_int_list(request.GET,
                                      'project_ids')  # type: List[int]

            columns = as_str_list(request.GET, 'columns')

            include_annotations = as_bool(request.GET, 'associated_text')
            if include_annotations:
                all_annotation_columns = get_annotation_columns(document_type)
                columns += [
                    i.field_code for i in all_annotation_columns
                    if i.field_code.rstrip(FIELD_CODE_ANNOTATION_SUFFIX) in
                    columns
                ]

            fmt = request.GET.get('fmt') or self.FMT_JSON
            as_zip = request.GET.get('as_zip') == 'true'

            offset = as_int(request.GET, 'offset', None)
            if offset is not None and offset < 0:
                offset = None

            limit = as_int(request.GET, 'limit', None)
            if limit is not None and limit <= 0:
                limit = None

            # For json output we limit number of returned documents because we dont use streaming response for JSON
            # and want to keep it fast.
            if fmt == self.FMT_JSON and self.MAX_RETURNED_DOCUMENTS_JSON is not None \
                    and (limit is None or limit > self.MAX_RETURNED_DOCUMENTS_JSON):
                limit = self.MAX_RETURNED_DOCUMENTS_JSON

            saved_filters = as_int_list(request.GET,
                                        'saved_filters')  # type: List[int]

            column_filters = list()
            for param, value in request.GET.items():  # type: str, str
                if param.startswith(self.URL_PARAM_PREFIX_FILTER):
                    column_filters.append(
                        (param[len(self.URL_PARAM_PREFIX_FILTER):], value))

            # in case if filter params are passed like &filters=a=b&c=d
            filter_query_string = request.GET.get('filters')
            if filter_query_string:
                for param, value in ast.literal_eval(
                        filter_query_string).items():  # type: str, str
                    if param.startswith(self.URL_PARAM_PREFIX_FILTER):
                        column_filters.append(
                            (param[len(self.URL_PARAM_PREFIX_FILTER):], value))

            order_by = request.GET.get('order_by') or None  # type: str
            order_by = parse_order_by(order_by) if order_by else None

            save_filter = as_bool(request.GET, 'save_filter',
                                  False)  # type: bool

            return_reviewed = as_bool(request.GET, 'return_reviewed', False)
            return_total = as_bool(request.GET, 'return_total', True)
            return_data = as_bool(request.GET, 'return_data', True)
            ignore_errors = as_bool(request.GET, 'ignore_errors', True)

            if project_ids and save_filter:
                column_filters_dict = {c: f for c, f in column_filters}
                for project_id in project_ids:
                    with transaction.atomic():
                        obj = SavedFilter.objects.create(
                            user=request.user,
                            document_type=document_type,
                            filter_type=FT_USER_DOC_GRID_CONFIG,
                            project_id=project_id,
                            columns=columns,
                            column_filters=column_filters_dict,
                            title=None,
                            order_by=[(column, direction.value)
                                      for column, direction in order_by]
                            if order_by else None)
                        SavedFilter.objects.filter(user=request.user,
                                                   filter_type=FT_USER_DOC_GRID_CONFIG,
                                                   project_id=project_id) \
                            .exclude(pk=obj.pk) \
                            .delete()

            # show_unprocessed = as_bool(request.GET, 'show_unprocessed', False)
            # if show_unprocessed is False:
            #     column_filters.append((FIELD_CODE_DOC_PROCESSED, 'true'))
            total_documents_query = Document.objects.filter(
                document_type=document_type)
            if project_ids:
                total_documents_query = total_documents_query.filter(
                    project_id__in=project_ids)
            total_documents_of_type = total_documents_query.count()

            columns_to_query = columns
            if columns_to_query:
                columns_to_query = leave_unique_values(
                    ['document_id', 'document_name'] + columns)

            query_results = query_documents(
                requester=request.user,
                document_type=document_type,
                project_ids=project_ids,
                column_names=columns_to_query,  # columns,
                saved_filter_ids=saved_filters,
                column_filters=column_filters,
                order_by=order_by,
                offset=offset,
                limit=limit,
                return_documents=return_data,
                return_reviewed_count=return_reviewed,
                return_total_count=return_total,
                ignore_errors=ignore_errors,
                include_annotation_fields=True)  # type: DocumentQueryResults

            if query_results is None:
                if fmt in {self.FMT_XLSX, self.FMT_CSV} and not return_data:
                    raise APIRequestError('Empty data, nothing to export')
                return Response({'time': time.time() - start})

            # get assignees stats
            assignees_query_results = query_documents(
                requester=request.user,
                document_type=document_type,
                project_ids=project_ids,
                column_names=['document_id', 'assignee_name', 'assignee_id'],
                saved_filter_ids=saved_filters,
                column_filters=column_filters,
                return_documents=True,
                return_reviewed_count=False,
                include_annotation_fields=include_annotations
            )  # type: DocumentQueryResults

            query_results.assignees = []
            if assignees_query_results is not None:
                df = pd.DataFrame(assignees_query_results.fetch_dicts())
                if not df.empty:
                    df = df.groupby(['assignee_id', 'assignee_name'])\
                        .agg({'document_id': [('document_ids', lambda x: list(x)), ('documents_count', 'count')]})
                    if not df.empty:
                        df.columns = df.columns.droplevel()
                        df = df.reset_index()
                        df['assignee_id'] = df['assignee_id'].astype(int)
                        query_results.assignees = df.to_dict('records')

            query_results.unfiltered_count = total_documents_of_type

            if fmt in {self.FMT_XLSX, self.FMT_CSV} and not return_data:
                raise APIRequestError(
                    'Export to csv/xlsx requested with return_data=false')

            if fmt == self.FMT_CSV:
                return query_results.to_csv(as_zip=as_zip)
            elif fmt == self.FMT_XLSX:
                return query_results.to_xlsx(as_zip=as_zip)
            else:
                query_dict = query_results.to_json(time_start=start)
                if columns and 'items' in query_dict:
                    columns_to_remove = []
                    if 'document_id' not in columns:
                        columns_to_remove.append('document_id')
                    query_dict['items'] = self.expand_items(
                        query_dict['items'], columns_to_remove)
                return Response(query_dict)
        except APIRequestError as e:
            return e.to_response()
        except Exception as e:
            return APIRequestError(message='Unable to process request',
                                   caused_by=e,
                                   http_status_code=500).to_response()