def __call__(self, subchash, query: Tuple[str, ...], samplesize: int): try: calc_from, conc = find_cached_conc_base(self.corpus_obj, subchash, query, minsize=0) if isinstance( conc, InitialConc ): # we have nothing, let's start with the 1st operation only calc_status = self.cache_map.add_to_map(subchash, query[:1], CalcStatus(), overwrite=True) conc = self.compute_conc(self.corpus_obj, query[:1], samplesize) conc.sync() conc.save(calc_status.cachefile) self.cache_map.update_calc_status(subchash, query[:1], readable=True, finished=True, concsize=conc.size()) calc_from = 1 except Exception as ex: logging.getLogger(__name__).error(ex) self._mark_calc_states_err(subchash, query, 0, ex) return # save additional concordance actions to cache (e.g. sample, aligned corpus without a query,...) for act in range(calc_from, len(query)): try: command, args = query[act][0], query[act][1:] conc.exec_command(command, args) if command in 'gae': # user specific/volatile actions, cannot save raise NotImplementedError( f'Cannot run command {command} in background') # TODO status = self.cache_map.add_to_map(subchash, query[:act + 1], CalcStatus(), overwrite=True) conc.save(status.cachefile) self.cache_map.update_calc_status(subchash, query[:act + 1], readable=True, finished=True, concsize=conc.size()) except Exception as ex: self._mark_calc_states_err(subchash, query, act, ex) logging.getLogger(__name__).error(ex) return
def _get_entry(self, subchash, q): val = self._db.hash_get(self._mk_key(), _uniqname(subchash, q)) if val: if type(val[1]) is not dict: return None return [val[0], CalcStatus().update(val[1]), val[2]] return None
def _get_entry(self, subchash, q) -> Union[CachedConcInfo, None]: val = self._db.hash_get(self._mk_key(), _uniqname(subchash, q)) if val: if type(val[1]) is not dict: return None return val[0], CalcStatus(**val[1]), val[2] return None
def add_to_map(self, subchash: Optional[str], query: Tuple[str, ...], calc_status: CalcStatus, overwrite: bool = False) -> CalcStatus: """ return: path to a created cache file """ prev_status = self._get_entry(subchash, query) if prev_status and not overwrite: return prev_status calc_status.q0hash = _uniqname(subchash, query[:1]) calc_status.cachefile = self._create_cache_file_path(subchash, query) self._set_entry(subchash, query, calc_status) return calc_status
def _get_bg_conc(corp: manatee.Corpus, user_id: int, q: Tuple[str, ...], subchash: Optional[str], samplesize: int, calc_from: int, minsize: int) -> Union[PyConc, EmptyConc]: """ arguments: calc_from - from which operation idx (inclusive) we have to calculate respective results """ cache_map = plugins.runtime.CONC_CACHE.instance.get_mapping(corp) # let's create cache records of the operations we'll have to perform if calc_from < len(q): for i in range(calc_from, len(q)): cachefile, _ = cache_map.add_to_map(subchash, q[:i + 1], 0, calc_status=CalcStatus()) if os.path.isfile(cachefile): del_silent(cachefile) logging.getLogger(__name__).warning( f'Removed unbound conc. cache file {cachefile}') app = bgcalc.calc_backend_client(settings) app.send_task('conc_sync_calculate', (user_id, corp.corpname, getattr( corp, 'subcname', None), subchash, q, samplesize), time_limit=TASK_TIME_LIMIT) # for smaller concordances/corpora there is a chance the data # is ready in a few seconds - let's try this: conc_avail = wait_for_conc(cache_map=cache_map, subchash=subchash, q=q, minsize=minsize) if conc_avail: return PyConc(corp, 'l', cache_map.cache_file_path(subchash, q)) else: # return empty yet unfinished concordance to make the client watch the calculation return EmptyConc(corp, cache_map.cache_file_path(subchash, q))
def update_calc_status(self, subchash, query, calc_status): stored_data = self._get_entry(subchash, query) if stored_data: storedsize, stored_calc_status, q0hash = stored_data if calc_status is not None: stored_calc_status.update(calc_status) else: stored_calc_status = CalcStatus() self._set_entry(subchash, query, [storedsize, stored_calc_status, q0hash])
def del_full_entry(self, subchash: Optional[str], q: Tuple[str, ...]): for k, stored in list(self._db.hash_get_all(self._mk_key()).items()): if stored: if type(stored) is not dict: logging.getLogger(__name__).warning( 'Removed unsupported conc cache value: {}'.format( stored)) self._db.hash_del(self._mk_key(), k) else: status = CalcStatus(**stored) if _uniqname(subchash, q[:1]) == status.q0hash: # original record's key must be used (k ~ entry_key match can be partial) self._db.hash_del( self._mk_key(), k) # must use direct access here (no del_entry())
def create_new_calc_status(self) -> CalcStatus: return CalcStatus(task_id=self._task_id)
def _set_entry(self, subchash, q, data: CalcStatus): self._db.hash_set(self._mk_key(), _uniqname(subchash, q), data.to_dict())
def _get_entry(self, subchash, q) -> Union[CalcStatus, None]: val = self._db.hash_get(self._mk_key(), _uniqname(subchash, q)) if val and type(val) is dict: return CalcStatus(**val) return None