def get_document_loading_progress(self) -> Dict[str, int]:
        from apps.task.tasks import LoadDocuments
        # from apps.project.tasks import LoadArchive
        load_tasks_total = Task.objects.filter(
            name__in=[LoadDocuments.name],
            upload_session_id=self.pk).count()

        docs = self.document_set.all()
        doc_type = self.project.type

        docs_count = docs.count()
        docs_count = max(load_tasks_total, docs_count)
        doc_ids = list(docs.values_list('pk', flat=True))

        docs_cached = 0
        if doc_ids:
            doc_ids_str = ','.join([str(id) for id in doc_ids])
            cache_table_name = doc_fields_table_name(doc_type.code)
            with connection.cursor() as cursor:
                cursor.execute(f'''SELECT COUNT(*) FROM "{cache_table_name}" WHERE document_id IN ({doc_ids_str});''')
                row = cursor.fetchone()
                docs_cached = row[0]

        # total progress is a weighted sum
        result = {'task_count': load_tasks_total,
                  'docs_count': docs_count,
                  'docs_cached': docs_cached}
        progress = 0

        if docs_count > 0:
            numerator = result['docs_cached']
            progress = round(100 * numerator / docs_count, 1)
        result['progress'] = progress
        return result
def get_non_indexed_doc_ids_not_planned_to_index_by_predicate(
        doc_type_code: str,
        predicate: str,
        pack_size: int = 100) -> Generator[List[int], None, None]:
    table_name = doc_fields_table_name(doc_type_code)

    with connection.cursor() as cursor:
        # return documents of the specified type which
        # - do not exist in the corresponding fields cache
        # - have no planned but not-started reindex tasks on them
        cursor.execute(
            'select dd.id \n'
            'from document_document dd \n'
            f'left outer join "{table_name}" df on dd.id = df.document_id \n'
            'left outer join lateral (select jsonb_array_elements(args->0) doc_id \n'
            '                         from task_task \n'
            f"                         where name = '{_get_reindex_task_name()}' \n"
            "                         and own_status = 'PENDING'\n"
            '                         and date_work_start is null) tt on tt.doc_id = to_jsonb(dd.id) \n'
            f'where {predicate} and df.document_id is null and tt.doc_id is null \n'
            'and dd.processed is true')

        rows = cursor.fetchmany(pack_size)
        while rows:
            yield [row[0] for row in rows]
            rows = cursor.fetchmany(pack_size)
def do_rename(apps, schema_editor):
    DocumentField = apps.get_model('document', 'DocumentField')
    to_rename = list()  # type: List[Tuple[str, str, str]]
    for field in DocumentField.objects.filter(type='ratio'):
        table_name = doc_fields_table_name(field.document_type.code)
        handler = RatioRawdbFieldHandler(field.code, field.type, field.title,
                                         table_name)
        to_rename.append(
            (table_name,
             escape_column_name(handler.field_column_name_base + '_con'),
             escape_column_name(handler.field_column_name_base + '_den')))

    for table_name, column_from, column_to in to_rename:
        with connection.cursor() as cursor:
            cursor.execute(f'''
            DO $$
            BEGIN
                IF EXISTS(SELECT column_name 
                            FROM information_schema.columns 
                            WHERE table_name='{table_name}' and column_name='{column_from}')
                    AND NOT 
                    EXISTS(SELECT column_name 
                            FROM information_schema.columns 
                            WHERE table_name='{table_name}' and column_name='{column_to}')
                    THEN
                    ALTER TABLE {table_name} RENAME COLUMN {column_from} TO {column_to};
                END IF;
            END $$;
            ''')
def do_migrate(apps, schema_editor):
    DocumentType = apps.get_model('document', 'DocumentType')
    with connection.cursor() as cursor:
        for document_type in DocumentType.objects.all():
            table_name = doc_fields_table_name(
                document_type_code=document_type.code)
            cursor.execute('DROP TABLE IF EXISTS "{table_name}"'.format(
                table_name=table_name))
Exemple #5
0
def there_are_non_indexed_docs_not_planned_to_index(
        document_type: DocumentType,
        log: ProcessLogger) -> bool:
    for doc_id in non_indexed_doc_ids_not_planned_to_index_by_doc_type(document_type, 1):
        if doc_id:
            task_name = _get_reindex_task_name()
            fields_table = doc_fields_table_name(document_type.code)
            log.info(f'there_are_non_indexed_docs_not_planned_to_index: '
                     f'found document id={doc_id} of type {document_type.code}, '
                     f'task {task_name}. Fields table: {fields_table}')
            return True
    return False