Beispiel #1
0
def _get_bg_conc(corp: manatee.Corpus, user_id: int, q: Tuple[str, ...],
                 subchash: Optional[str], samplesize: int, calc_from: int,
                 minsize: int) -> Union[PyConc, EmptyConc]:
    """
    arguments:
    calc_from - from which operation idx (inclusive) we have to calculate respective results
    """
    cache_map = plugins.runtime.CONC_CACHE.instance.get_mapping(corp)
    # let's create cache records of the operations we'll have to perform
    if calc_from < len(q):
        for i in range(calc_from, len(q)):
            cachefile, _ = cache_map.add_to_map(subchash,
                                                q[:i + 1],
                                                0,
                                                calc_status=CalcStatus())
            if os.path.isfile(cachefile):
                del_silent(cachefile)
                logging.getLogger(__name__).warning(
                    f'Removed unbound conc. cache file {cachefile}')
        app = bgcalc.calc_backend_client(settings)
        app.send_task('conc_sync_calculate',
                      (user_id, corp.corpname, getattr(
                          corp, 'subcname', None), subchash, q, samplesize),
                      time_limit=TASK_TIME_LIMIT)
    # for smaller concordances/corpora there is a chance the data
    # is ready in a few seconds - let's try this:
    conc_avail = wait_for_conc(cache_map=cache_map,
                               subchash=subchash,
                               q=q,
                               minsize=minsize)
    if conc_avail:
        return PyConc(corp, 'l', cache_map.cache_file_path(subchash, q))
    else:
        # return empty yet unfinished concordance to make the client watch the calculation
        return EmptyConc(corp, cache_map.cache_file_path(subchash, q))
Beispiel #2
0
def _get_async_conc(corp, user_id, q, subchash, samplesize, minsize):
    """
    """
    app = bgcalc.calc_backend_client(settings)
    ans = app.send_task('worker.conc_register', (user_id, corp.corpname, getattr(corp, 'subcname', None),
                                                 subchash, q, samplesize, TASK_TIME_LIMIT),
                        time_limit=CONC_REGISTER_TASK_LIMIT)
    ans.get(timeout=CONC_REGISTER_WAIT_LIMIT)
    cache_map = plugins.runtime.CONC_CACHE.instance.get_mapping(corp)
    conc_avail = wait_for_conc(cache_map=cache_map, subchash=subchash, q=q, minsize=minsize)
    if conc_avail:
        return PyConc(corp, 'l', cache_map.cache_file_path(subchash, q))
    else:
        return EmptyConc(corp, cache_map.cache_file_path(subchash, q))
Beispiel #3
0
def _get_bg_conc(corp: AbstractKCorpus, user_id: int, q: Tuple[str, ...],
                 subchash: Optional[str], samplesize: int, calc_from: int,
                 minsize: int) -> Union[PyConc, InitialConc]:
    """
    arguments:
    calc_from - from which operation idx (inclusive) we have to calculate respective results
    """
    cache_map = plugins.runtime.CONC_CACHE.instance.get_mapping(corp)

    status = cache_map.get_calc_status(subchash, q)
    if status and not status.finished:  # the calc is already running, the client has to wait and check regularly
        return InitialConc(corp, status.cachefile)
    # let's create cache records of the operations we'll have to perform
    if calc_from < len(q):
        for i in range(calc_from, len(q)):
            status = cache_map.add_to_map(subchash,
                                          q[:i + 1],
                                          ConcCacheStatus(),
                                          overwrite=True)
            # the file cannot be valid as otherwise, calc_from would be higher
            if os.path.isfile(status.cachefile):
                del_silent(status.cachefile)
                logging.getLogger(__name__).warning(
                    f'Removed unbound conc. cache file {status.cachefile}')
        worker = bgcalc.calc_backend_client(settings)
        worker.send_task(
            'conc_sync_calculate',
            object.__class__,
            (user_id, corp.corpname, getattr(corp, 'subcname',
                                             None), subchash, q, samplesize),
            time_limit=TASK_TIME_LIMIT)
    # for smaller concordances/corpora there is a chance the data
    # is ready in a few seconds - let's try this:
    conc_avail = wait_for_conc(cache_map=cache_map,
                               subchash=subchash,
                               q=q,
                               minsize=minsize)
    if conc_avail:
        return PyConc(corp, 'l', cache_map.readable_cache_path(subchash, q))
    else:
        # return empty yet unfinished concordance to make the client watch the calculation
        return InitialConc(corp, cache_map.readable_cache_path(subchash, q))
Beispiel #4
0
def _get_async_conc(corp, user_id, q, subchash, samplesize, minsize):
    """
    """
    cache_map = plugins.runtime.CONC_CACHE.instance.get_mapping(corp)
    status = cache_map.get_calc_status(subchash, q)
    if not status or status.error:
        worker = bgcalc.calc_backend_client(settings)
        ans = worker.send_task(
            'conc_register',
            object.__class__,
            (user_id, corp.corpname, getattr(corp, 'subcname', None), subchash,
             q, samplesize, TASK_TIME_LIMIT),
            time_limit=CONC_REGISTER_TASK_LIMIT)
        ans.get(timeout=CONC_REGISTER_WAIT_LIMIT)
    conc_avail = wait_for_conc(cache_map=cache_map,
                               subchash=subchash,
                               q=q,
                               minsize=minsize)
    if conc_avail:
        return PyConc(corp, 'l', cache_map.readable_cache_path(subchash, q))
    else:
        return InitialConc(corp, cache_map.readable_cache_path(subchash, q))
Beispiel #5
0
def get_conc(corp,
             user_id,
             q: Tuple[str, ...] = None,
             fromp=0,
             pagesize=0,
             asnc=0,
             save=0,
             samplesize=0) -> Union[manatee.Concordance, EmptyConc]:
    """
    Get/calculate a concordance. The function always tries to fetch as complete
    result as possible (related to the 'q' tuple) from cache. The rest is calculated
    in different ways depending on contents of 'q' and also on the 'asnc' argument
    (if 0 then the conc is always calculated synchronously and within the same process,
    if 1 then the calculation can involve a) background calculation based on Manatee's
    asynchronous/continuous concordance fetching or b) background calculation with
    no continuous data fetching (i.e. user waits and then the whole result is avail.).

    corp -- a respective manatee.Corpus object
    user_id -- database user ID
    q -- a tuple/list containing an extended query representation
         (e.g. ['aword,[] within <doc id="foo" />', 'p0 ...'])
    fromp -- a page offset
    pagesize -- a page size (in lines, related to 'fromp')
    asnc -- if 1 then KonText spawns an asynchronous process to calculate the concordance
            and will provide results as they are ready
    save -- specifies whether to use a caching mechanism
    samplesize -- ?
    """
    if not q:
        return EmptyConc(corp=corp, finished=True)
    # complete bg calc. without continuous data fetching => must accept 0
    if _should_be_bg_query(corp, q, asnc):
        minsize = 0
    elif len(
            q
    ) > 1 or asnc == 0:  # conc with additional ops. needs whole concordance
        minsize = -1
    else:
        minsize = fromp * pagesize  # happy case for a user
    subchash = getattr(corp, 'subchash', None)
    conc = EmptyConc(corp=corp, finished=True)
    # try to locate concordance in cache
    if save:
        calc_from, conc = find_cached_conc_base(corp, subchash, q, minsize)
        if calc_from == len(q):
            save = 0
        if not conc and q[0][0] == 'R':  # online sample
            q_copy = list(q)
            q_copy[0] = q[0][1:]
            q_copy = tuple(q_copy)
            t, c = find_cached_conc_base(corp, subchash, q_copy, -1)
            if c:
                fullsize = c.fullsize()  # TODO fullsize ???
    else:
        calc_from = 1
        asnc = 0

    # move mid-sized aligned corpora or large non-aligned corpora to background
    if _should_be_bg_query(corp, q, asnc):
        minsize = fromp * pagesize
        conc = _get_bg_conc(corp=corp,
                            user_id=user_id,
                            q=q,
                            subchash=subchash,
                            samplesize=samplesize,
                            calc_from=calc_from,
                            minsize=minsize)
    else:
        worker = GeneralWorker()
        if isinstance(conc, EmptyConc):
            calc_from = 1
            # use Manatee asynchronous conc. calculation (= show 1st page once it's avail.)
            if asnc and len(q) == 1:
                conc = _get_async_conc(corp=corp,
                                       user_id=user_id,
                                       q=q,
                                       subchash=subchash,
                                       samplesize=samplesize,
                                       minsize=minsize)

            # do the calc here and return (OK for small to mid sized corpora without alignments)
            else:
                conc = _get_sync_conc(worker=worker,
                                      corp=corp,
                                      q=q,
                                      save=save,
                                      subchash=subchash,
                                      samplesize=samplesize)
        # save additional concordance actions to cache (e.g. sample)
        for act in range(calc_from, len(q)):
            command, args = q[act][0], q[act][1:]
            conc.exec_command(command, args)
            if command in 'gae':  # user specific/volatile actions, cannot save
                save = 0
            if save:
                cache_map = plugins.runtime.CONC_CACHE.instance.get_mapping(
                    corp)
                cachefile, stored_status = cache_map.add_to_map(
                    subchash,
                    q[:act + 1],
                    conc.size(),
                    calc_status=worker.create_new_calc_status())
                if stored_status and not stored_status.finished:
                    ready = wait_for_conc(cache_map=cache_map,
                                          subchash=subchash,
                                          q=q[:act + 1],
                                          minsize=-1)
                    if not ready:
                        raise ConcCalculationStatusException(
                            'Wait for concordance operation failed')
                elif not stored_status:
                    conc.save(cachefile)
                    cache_map.update_calc_status(subchash,
                                                 q[:act + 1],
                                                 finished=True,
                                                 concsize=conc.size())
    return conc