def cache_geo_config(): geo_config = {} for name, pk, priority in GeoEntity.objects.values_list( 'name', 'pk', 'priority'): entity = dict_entities.DictionaryEntry(pk, name, priority or 0, name_is_alias=True) geo_config[pk] = entity for alias_id, alias_text, alias_type, entity_id, alias_lang \ in GeoAlias.objects.values_list('pk', 'alias', 'type', 'entity', 'locale'): entity = geo_config[entity_id] if entity: is_abbrev = alias_type.startswith('iso') or alias_type.startswith( 'abbrev') for alias in alias_text.split(';'): entity.aliases.append( dict_entities.DictionaryEntryAlias( alias, language=alias_lang, is_abbreviation=is_abbrev, alias_id=alias_id)) res = list(geo_config.values()) # DbCache.put_to_db(CACHE_KEY_GEO_CONFIG, res) redis.push(CACHE_KEY_GEO_CONFIG, res) return res
def create(self, user): token_user_key = self.get_token_user_key(user.id) token = self.gen_auth_token() token_key = self.get_token_key(token) # create token pair - because redis has no support for quick search by value (token) redis.push(token_user_key, token_key, ex=self.key_expiration_time) redis.push(token_key, token_user_key, ex=self.key_expiration_time) logger.info( f'Cached auth token "{token_user_key}:{token}" for user {user}') return token_user_key, self.get_token_object(user, token)
def rewrite_cache(cls): records = list(BanListRecord.objects.all()) records_str = pickle.dumps(records) m = hashlib.md5() m.update(records_str) records_checksum = m.hexdigest() redis.push(f'{cls.CACHE_KEY}_data', records_str, pickle_value=False) redis.push(f'{cls.CACHE_KEY}_hash', records_checksum, pickle_value=False) cls.LAST_CACHED_HASH = records_checksum return records
def cache_court_config(): res = [ dict_entities.DictionaryEntry(id=i.id, name=i.name, priority=0, aliases=[ dict_entities.DictionaryEntryAlias(a) for a in i.alias.split(';') ] if i.alias else []) for i in Court.objects.all() ] # DbCache.put_to_db(CACHE_KEY_COURT_CONFIG, res) redis.push(CACHE_KEY_COURT_CONFIG, res) return res
def cache_term_stems(project_id=None): term_stems = {} terms_qs = Term.objects key = CACHE_KEY_TERM_STEMS if project_id is not None: qs = ProjectTermConfiguration.objects.filter(project_id=project_id) if qs.exists(): terms_qs = qs.last().terms key = CACHE_KEY_TERM_STEMS_PROJECT_PTN.format(project_id) for t, pk in terms_qs.values_list('term', 'pk'): stemmed_term = ' %s ' % ' '.join(get_stems(t)) stemmed_item = term_stems.get(stemmed_term, []) stemmed_item.append([t, pk]) term_stems[stemmed_term] = stemmed_item for item in term_stems: term_stems[item] = dict(values=term_stems[item], length=len(term_stems[item])) # DbCache.put_to_db(key, term_stems) redis.push(key, term_stems) return term_stems
def cache_users(self, users): # cache user qs for 5 min redis.push(self.users_cache_key, users, ex=self.cached_users_expiration_time)
def process(self, **kwargs): search_similar_documents = kwargs['search_similar_documents'] search_similar_text_units = kwargs['search_similar_text_units'] project = kwargs['project'] project_id = project['pk'] if project else None unit_type = kwargs['unit_type'] feature_source = kwargs['feature_source'] use_tfidf = kwargs['use_tfidf'] distance_type = kwargs['distance_type'] similarity_threshold = kwargs['similarity_threshold'] / 100 self.log_info('Min similarity: {}'.format(similarity_threshold)) if search_similar_documents: engine_class = DocumentSimilarityEngine elif search_similar_text_units: engine_class = TextUnitSimilarityEngine else: self.log_error("Classify task target (documents or text units) is not specified.") return if kwargs['delete']: if search_similar_text_units: if project_id: deleted = TextUnitSimilarity.objects.filter( Q(project_a__id=project_id) | Q(project_b__id=project_id)).delete() else: deleted = TextUnitSimilarity.objects.all().delete() else: if project_id: deleted = DocumentSimilarity.objects.filter( Q(document_a__project__id=project_id) | Q(document_b__project__id=project_id)).delete() else: deleted = DocumentSimilarity.objects.all().delete() self.log_info('Deleted "{}"'.format(deleted[1])) similarity_engine_kwargs = dict( project_id=project_id, unit_type=unit_type, feature_source=feature_source, use_tfidf=use_tfidf, distance_type=distance_type, threshold=similarity_threshold ) similarity_engine = engine_class(**similarity_engine_kwargs) features = similarity_engine.get_features() feature_matrix = features.term_frequency_matrix feature_records = feature_matrix.shape[0] subtasks_args = [] for block_i_start in range(0, feature_records, similarity_engine.block_step): block_i_end = block_i_start + similarity_engine.block_step df1_redis_key = f'{self.task.pk}_{block_i_start}_{block_i_end}' if not redis.exists(df1_redis_key): df1_data = (feature_matrix[block_i_start:block_i_end], features.item_index[block_i_start:block_i_end], features.feature_names) redis.push(key=df1_redis_key, value=df1_data, pickle_value=True) for block_j_start in range(0, feature_records, similarity_engine.block_step): block_j_end = block_j_start + similarity_engine.block_step self.log_info(f'Cache data for blocks: ' f'{block_i_start}:{block_i_end} - {block_j_start}:{block_j_end}') df2_redis_key = f'{self.task.pk}_{block_j_start}_{block_j_end}' if not redis.exists(df2_redis_key): df2_data = (feature_matrix[block_j_start:block_j_end], features.item_index[block_j_start:block_j_end], features.feature_names) redis.push(key=df2_redis_key, value=df2_data, pickle_value=True) subtasks_args.append(( df1_redis_key, df2_redis_key, search_similar_documents, similarity_engine_kwargs, project_id )) self.run_sub_tasks( 'Calculate similarities for feature_df blocks', self.calc_block_similarity, subtasks_args ) self.run_after_sub_tasks_finished('Clear redis keys.', self.finalize, [()])
def cache(self): redis.push(self.cache_key, self.value)
def process(self, **kwargs): search_similar_documents = kwargs['search_similar_documents'] search_similar_text_units = kwargs['search_similar_text_units'] project = kwargs['project'] project_id = project['pk'] if project else None unit_type = kwargs['unit_type'] feature_source = kwargs['feature_source'] use_tfidf = kwargs['use_tfidf'] distance_type = kwargs['distance_type'] similarity_threshold = kwargs['similarity_threshold'] / 100 self.log_info('Min similarity: {}'.format(similarity_threshold)) if search_similar_documents: db_model = DocumentSimilarity engine_class = DocumentSimilarityEngine elif search_similar_text_units: db_model = TextUnitSimilarity engine_class = TextUnitSimilarityEngine else: self.log_error( "Classify task target (documents or text units) is not specified." ) return if kwargs['delete']: # TODO: delete all Similarity db objects OR filter by unit_type/project_id deleted = db_model.objects.filter().delete() self.log_info('Deleted "{}"'.format(deleted[1])) similarity_engine_kwargs = dict(project_id=project_id, unit_type=unit_type, feature_source=feature_source, use_tfidf=use_tfidf, distance_type=distance_type, threshold=similarity_threshold) similarity_engine = engine_class(**similarity_engine_kwargs) feature_df = similarity_engine.get_features().feature_df subtasks_args = [] for block_i_start in range(0, feature_df.shape[0], similarity_engine.block_step): for block_j_start in range(0, feature_df.shape[0], similarity_engine.block_step): df1 = feature_df.iloc[block_i_start:block_i_start + similarity_engine.block_step, :] df1_redis_key = f'{self.task.pk}_{block_i_start}_{block_i_start + similarity_engine.block_step}' redis.push(key=df1_redis_key, value=df1, pickle_value=True) df2 = feature_df.iloc[block_j_start:block_j_start + similarity_engine.block_step, :] df2_redis_key = f'{self.task.pk}_{block_j_start}_{block_j_start + similarity_engine.block_step}' redis.push(key=df2_redis_key, value=df2, pickle_value=True) subtasks_args.append( (df1_redis_key, df2_redis_key, search_similar_documents, similarity_engine_kwargs)) self.run_sub_tasks('Calculate similarities for feature_df blocks', self.calc_block_similarity, subtasks_args) self.run_after_sub_tasks_finished('Clear redis keys.', self.finalize, [()])