def make_records_untyped(_apps, _schema_editor): for cache_key in CACHE_KEYS: untyped = [] records = DbCache.get(cache_key) for record in records: if record.__class__.__name__ != 'DictionaryEntry': untyped.append(record) continue try: aliases = [(a.alias, a.language, a.is_abbreviation, a.alias_id, a.normalized_alias) for a in record.aliases] rec = ( record.id, record.name, record.priority, aliases, ) untyped.append(rec) except Exception as e: print( f'Unable to cast a DictionaryEntry in "{cache_key}" to a tuple: {e}' ) DbCache.put_to_db(cache_key, untyped) if DbCache.INSTANCE: DbCache.INSTANCE.stop_watching()
def make_records_typed(_apps, _schema_editor): for cache_key in CACHE_KEYS: typed = [] # type: List[DictionaryEntry] records = DbCache.get(cache_key) for record in records: if record.__class__.__name__ == 'DictionaryEntry': typed.append(record) continue try: aliases = [ DictionaryEntryAlias(alias, lang, is_abbr, alias_id, norm_als) for alias, lang, is_abbr, alias_id, norm_als in record[3] ] rec = DictionaryEntry(record[0], record[1], priority=record[2], aliases=aliases) typed.append(rec) except Exception as e: print( f'Unable to cast a record in "{cache_key}" to DictionaryEntry: {e}' ) DbCache.put_to_db(cache_key, typed) if DbCache.INSTANCE: DbCache.INSTANCE.stop_watching()
def cache_court_config(): res = [dict_entities.entity_config( entity_id=i.id, name=i.name, priority=0, aliases=i.alias.split(';') if i.alias else [] ) for i in Court.objects.all()] DbCache.put_to_db(CACHE_KEY_COURT_CONFIG, res)
def cache_term_stems(): term_stems = {} for t, pk in Term.objects.values_list('term', 'pk'): stemmed_term = ' %s ' % ' '.join(get_stems(t)) stemmed_item = term_stems.get(stemmed_term, []) stemmed_item.append([t, pk]) term_stems[stemmed_term] = stemmed_item for item in term_stems: term_stems[item] = dict(values=term_stems[item], length=len(term_stems[item])) DbCache.put_to_db(CACHE_KEY_TERM_STEMS, term_stems)
def normalize(task_id, key, value): DB_CACHED_FILE_LIMIT = 1024 * 1024 * 100 try: json.dumps(value) return value except TypeError: if isinstance(value, models.Model): return SimpleObjectSerializer().serialize([value]).pop() elif isinstance(value, QuerySet): return SimpleObjectSerializer().serialize(value) elif isinstance(value, (dict, list, tuple, set)): return pre_serialize(task_id, key, value) elif isinstance(value, UploadedFile): uploaded_file = value # type: UploadedFile if uploaded_file.size < DB_CACHED_FILE_LIMIT: cache_key = str(task_id) + '__' + str(key) if key else str( task_id) DbCache.put_to_db(cache_key, uploaded_file.read()) return { 'file_name': uploaded_file.name, 'cache_key': cache_key } else: file_ref = ExportFile() file_ref.created_time = datetime.datetime.utcnow() file_ref.expires_at = datetime.datetime.utcnow( ) + datetime.timedelta(hours=1) file_ref.comment = f'Import documents from "{len(uploaded_file.name)}" file' time_part = str(datetime.datetime.utcnow()).replace( '.', '_').replace(':', '_').replace(' ', '_') file_name = f'doc_export_{os.path.splitext(uploaded_file.name)[0]}_{time_part}.zip' storage = get_file_storage() docs_subfolder = storage.sub_path_join(storage.export_path, 'documents') try: storage.mkdir(docs_subfolder) except: pass file_ref.file_path = storage.sub_path_join( docs_subfolder, file_name) storage.write_file(file_ref.file_path, uploaded_file, uploaded_file.size) file_ref.file_created = True file_ref.stored_time = datetime.datetime.utcnow() file_ref.save() return {'file_ref_id': file_ref.pk} return str(value)
def normalize(task_id, key, value): try: json.dumps(value) return value except TypeError: if isinstance(value, models.Model): return SimpleObjectSerializer().serialize([value]).pop() elif isinstance(value, QuerySet): return SimpleObjectSerializer().serialize(value) elif isinstance(value, (dict, list, tuple, set)): return pre_serialize(task_id, key, value) elif isinstance(value, UploadedFile): uploaded_file = value # type: UploadedFile cache_key = str(task_id) + '__' + str(key) if key else str(task_id) DbCache.put_to_db(cache_key, uploaded_file.read()) return {'file_name': uploaded_file.name, 'cache_key': cache_key} return str(value)
def cache_geo_config(): geo_config = {} for name, pk, priority in GeoEntity.objects.values_list('name', 'pk', 'priority'): entity = dict_entities.entity_config(pk, name, priority or 0, name_is_alias=True) geo_config[pk] = entity for alias_id, alias_text, alias_type, entity_id, alias_lang \ in GeoAlias.objects.values_list('pk', 'alias', 'type', 'entity', 'locale'): entity = geo_config[entity_id] if entity: is_abbrev = alias_type.startswith('iso') or alias_type.startswith('abbrev') dict_entities.add_aliases_to_entity(entity, aliases_csv=alias_text, language=alias_lang, is_abbreviation=is_abbrev, alias_id=alias_id) res = list(geo_config.values()) DbCache.put_to_db(CACHE_KEY_GEO_CONFIG, res)
def cache_term_stems(project_id=None): term_stems = {} terms_qs = Term.objects key = CACHE_KEY_TERM_STEMS if project_id is not None: qs = ProjectTermConfiguration.objects.filter(project_id=project_id) if qs.exists(): terms_qs = qs.last().terms key = CACHE_KEY_TERM_STEMS_PROJECT_PTN.format(project_id) for t, pk in terms_qs.values_list('term', 'pk'): stemmed_term = ' %s ' % ' '.join(get_stems(t)) stemmed_item = term_stems.get(stemmed_term, []) stemmed_item.append([t, pk]) term_stems[stemmed_term] = stemmed_item for item in term_stems: term_stems[item] = dict(values=term_stems[item], length=len(term_stems[item])) DbCache.put_to_db(key, term_stems)