예제 #1
0
    def process(self,
                document_type_config_csv_file: Dict,
                action: str,
                update_cache: bool,
                **kwargs):

        if action == 'validate':
            save = False
            auto_fix_validation_errors = False
            remove_missed_objects = False
        elif action == 'validate|import':
            save = True
            auto_fix_validation_errors = False
            remove_missed_objects = False
        elif action == 'import|auto_fix|retain_missing_objects':
            save = True
            auto_fix_validation_errors = True
            remove_missed_objects = False
        elif action == 'import|auto_fix|remove_missing_objects':
            save = True
            auto_fix_validation_errors = True
            remove_missed_objects = True
        else:
            raise RuntimeError('Unknown action')

        try:
            json_bytes = DbCache.get(document_type_config_csv_file['cache_key'])
            document_type = import_document_type(json_bytes=json_bytes,
                                                 save=save,
                                                 auto_fix_validation_errors=auto_fix_validation_errors,
                                                 remove_missed_in_dump_objects=remove_missed_objects,
                                                 task=self)
        finally:
            DbCache.clean_cache(document_type_config_csv_file['cache_key'])

        if not (save and update_cache):
            return

        from apps.rawdb.app_vars import APP_VAR_DISABLE_RAW_DB_CACHING
        if not APP_VAR_DISABLE_RAW_DB_CACHING.val:
            self.log_info('Adapting RawDB table structure after import ...')
            adapt_table_structure(CeleryTaskLogger(self), document_type, force=False)
        ids = Document.all_objects.filter(document_type=document_type).values_list('pk', flat=True)
        ids = list(ids)
        self.log_info('Caching document field values ...')

        for chunk in chunks(ids, 50):
            self.run_sub_tasks('Cache field values for a set of documents',
                               ImportDocumentType.cache_document_fields_for_doc_ids,
                               [(list(chunk),)])
예제 #2
0
 def process(self,
             document_field: Dict,
             config_csv_file: Dict,
             drop_previous_field_detectors: bool,
             update_field_choice_values: bool,
             csv_contains_regexps: bool,
             **kwargs):
     try:
         self.log_info('Going to configure simple field detection config...')
         document_field = DocumentField.objects.get(pk=document_field['pk'])
         csv_bytes = DbCache.get(config_csv_file['cache_key'])
         apply_simple_config(CeleryTaskLogger(self),
                             document_field,
                             csv_bytes,
                             drop_previous_field_detectors,
                             update_field_choice_values,
                             csv_contains_regexps=csv_contains_regexps)
     finally:
         DbCache.clean_cache(config_csv_file['cache_key'])
예제 #3
0
def download_task_attached_file(
        document_import_file: Dict[str, Any]) -> Generator[str, None, None]:
    if 'cache_key' in document_import_file:
        # download from DB cache
        zip_bytes = DbCache.get(document_import_file['cache_key'])
        ext = os.path.splitext(
            document_import_file['file_name'])[1][1:].lower()
        _fd, fn = tempfile.mkstemp(suffix=ext)
        try:
            with open(fn, 'wb') as fw:
                fw.write(zip_bytes)
                yield fn  # TODO: fix yield ...
        finally:
            DbCache.clean_cache(document_import_file['cache_key'])
    else:
        # download from file storage cache
        file_ref_id = document_import_file['file_ref_id']
        file_ref = ExportFile.objects.get(pk=file_ref_id)  # type: ExportFile
        storage = get_file_storage()
        with storage.get_as_local_fn(file_ref.file_path) as f_path:
            yield f_path[0]