コード例 #1
0
def _get_cached_conc(corp, subchash, q, pid_dir, minsize):
    """
    Loads a concordance from cache
    """
    start_time = time.time()
    q = tuple(q)
    if not os.path.isdir(pid_dir):
        os.makedirs(pid_dir, mode=0o775)

    cache_map = plugins.get('conc_cache').get_mapping(corp)
    cache_map.refresh_map()
    if _contains_shuffle_seq(q):
        srch_from = 1
    else:
        srch_from = len(q)

    ans = (0, None)
    for i in range(srch_from, 0, -1):
        cachefile = cache_map.cache_file_path(subchash, q[:i])
        if cachefile:
            pidfile = cache_map.get_stored_pidfile(subchash, q[:i])
            _wait_for_conc(corp=corp,
                           q=q,
                           subchash=subchash,
                           cachefile=cachefile,
                           cache_map=cache_map,
                           pidfile=pidfile,
                           minsize=minsize)
            if not os.path.exists(cachefile):  # broken cache
                cache_map.del_entry(subchash, q)
                try:
                    os.remove(pidfile)
                except OSError:
                    pass
                continue
            conccorp = corp
            for qq in reversed(q[:i]):  # find the right main corp, if aligned
                if qq.startswith('x-'):
                    conccorp = manatee.Corpus(qq[2:])
                    break
            conc = PyConc(conccorp, 'l', cachefile, orig_corp=corp)
            if not _is_conc_alive(pidfile, minsize) and not conc.finished():
                # unfinished and dead concordance
                cache_map.del_entry(subchash, q)
                try:
                    os.remove(cachefile)
                except OSError:
                    pass
                try:
                    os.remove(pidfile)
                except OSError:
                    pass
                continue
            ans = (i, conc)
            break
    logging.getLogger(__name__).debug(
        'get_cached_conc(%s, [%s]) -> %s, %01.4f' %
        (corp.corpname, ','.join(q), 'hit' if ans[1] else 'miss',
         time.time() - start_time))
    return ans
コード例 #2
0
ファイル: conclib.py プロジェクト: anukat2015/kontext
def _get_cached_conc(corp, subchash, q, pid_dir, minsize):
    """
    Loads a concordance from cache
    """
    start_time = time.time()
    q = tuple(q)
    if not os.path.isdir(pid_dir):
        os.makedirs(pid_dir, mode=0o775)

    cache_map = plugins.get('conc_cache').get_mapping(corp)
    cache_map.refresh_map()
    if _contains_shuffle_seq(q):
        srch_from = 1
    else:
        srch_from = len(q)

    ans = (0, None)
    for i in range(srch_from, 0, -1):
        cachefile = cache_map.cache_file_path(subchash, q[:i])
        if cachefile:
            pidfile = cache_map.get_stored_pidfile(subchash, q[:i])
            _wait_for_conc(corp=corp, q=q, subchash=subchash, cachefile=cachefile,
                           cache_map=cache_map, pidfile=pidfile, minsize=minsize)
            if not os.path.exists(cachefile):  # broken cache
                cache_map.del_entry(subchash, q)
                try:
                    os.remove(pidfile)
                except OSError:
                    pass
                continue
            conccorp = corp
            for qq in reversed(q[:i]):  # find the right main corp, if aligned
                if qq.startswith('x-'):
                    conccorp = manatee.Corpus(qq[2:])
                    break
            conc = PyConc(conccorp, 'l', cachefile, orig_corp=corp)
            if not _is_conc_alive(pidfile, minsize) and not conc.finished():
                # unfinished and dead concordance
                cache_map.del_entry(subchash, q)
                try:
                    os.remove(cachefile)
                except OSError:
                    pass
                try:
                    os.remove(pidfile)
                except OSError:
                    pass
                continue
            ans = (i, conc)
            break
    logging.getLogger(__name__).debug('get_cached_conc(%s, [%s]) -> %s, %01.4f'
                                      % (corp.corpname, ','.join(q), 'hit' if ans[1] else 'miss',
                                         time.time() - start_time))
    return ans
コード例 #3
0
ファイル: conclib.py プロジェクト: mirko-vogel/kontext
def _get_async_conc(corp, user_id, q, save, subchash, samplesize, fullsize,
                    minsize):
    """
    Note: 'save' argument is present because of bonito-open-3.45.11 compatibility but it is
    currently not used ----- TODO remove it
    """
    backend, conf = settings.get_full('global', 'calc_backend')
    if backend == 'multiprocessing':
        from concworker import mp
        mp.create_task(user_id, corp, subchash, q, samplesize).start()
    elif backend == 'celery':
        import task
        app = task.get_celery_app(conf['conf'])
        ans = app.send_task(
            'worker.conc_register',
            (user_id, corp.corpname, getattr(corp, 'subcname',
                                             None), subchash, q, samplesize))
        ans.get()  # = wait for task registration
    else:
        raise ValueError('Unknown concordance calculation backend: %s' %
                         (backend, ))

    cache_map = plugins.runtime.CONC_CACHE.instance.get_mapping(corp)
    try:
        _wait_for_conc(cache_map=cache_map,
                       subchash=subchash,
                       q=q,
                       minsize=minsize)
    except Exception as e:
        _cancel_async_task(cache_map, subchash, q)
        raise e
    return PyConc(corp, 'l', cache_map.cache_file_path(subchash, q))
コード例 #4
0
def _get_async_conc(corp, user_id, q, save, subchash, samplesize, fullsize,
                    minsize):
    """
    Note: 'save' argument is present because of bonito-open-3.45.11 compatibility but it is
    currently not used ----- TODO remove it
    """
    backend, conf = settings.get_full('corpora', 'conc_calc_backend')
    if backend == 'multiprocessing':
        from concworker.default import BackgroundCalc, NotifierFactory
        receiver, sender = NotifierFactory()()
        calc = BackgroundCalc(notification_sender=sender)
        proc = Process(target=calc, args=(
            corp,
            subchash,
            q,
            samplesize,
        ))
        proc.start()
    elif backend == 'celery':
        from concworker.wcelery import NotifierFactory
        import task
        app = task.get_celery_app(conf['conf'])
        res = app.send_task(
            'worker.register',
            (user_id, corp.corpname, getattr(corp, 'subcname',
                                             None), subchash, q, samplesize))
        receiver, sender = NotifierFactory(res)()
    else:
        raise ValueError('Unknown concordance calculation backend: %s' %
                         (backend, ))

    cachefile, pidfile = receiver.receive()
    try:
        _wait_for_conc(corp=corp,
                       q=q,
                       subchash=subchash,
                       cachefile=cachefile,
                       cache_map=plugins.get('conc_cache').get_mapping(corp),
                       pidfile=pidfile,
                       minsize=minsize)
        if not os.path.exists(cachefile):
            raise RuntimeError(
                'Concordance cache file [%s] not created. PID file: %s' %
                (cachefile, pidfile))
    except Exception as e:
        if os.path.exists(pidfile):
            os.remove(pidfile)
        raise e
    return PyConc(corp, 'l', cachefile)
コード例 #5
0
ファイル: conclib.py プロジェクト: mirko-vogel/kontext
def _get_cached_conc(corp, subchash, q, minsize):
    """
    Loads a concordance from cache. The function
    tries to find at least a sublist of 'q' (starting
    from zero) to avoid full concordance search if
    possible.

    arguments:
    corp -- a respective manatee.Corpus object
    subchash -- a subcorpus hash (generated by PyConc)
    q -- a query representation list
    minsize -- a minimum concordance size to return immediately (synchronously)

    returns:
    a 2-tuple [an index within 'q' where to start with non-cached results], [a concordance instance]
    """
    start_time = time.time()
    q = tuple(q)

    cache_map = plugins.runtime.CONC_CACHE.instance.get_mapping(corp)
    cache_map.refresh_map()
    if _contains_shuffle_seq(q):
        srch_from = 1
    else:
        srch_from = len(q)

    ans = (0, None)
    # try to find the most complete cached operation
    # (e.g. query + filter + sample)
    for i in range(srch_from, 0, -1):
        cachefile = cache_map.cache_file_path(subchash, q[:i])
        if cachefile:
            try:
                _wait_for_conc(cache_map=cache_map,
                               subchash=subchash,
                               q=q[:i],
                               minsize=minsize)
            except ConcCalculationControlException as ex:
                _cancel_async_task(cache_map, subchash, q[:i])
                logging.getLogger(__name__).warning(
                    'Removed broken concordance cache record. Original error: %s'
                    % (ex, ))
                continue
            conccorp = corp
            for qq in reversed(q[:i]):  # find the right main corp, if aligned
                if qq.startswith('x-'):
                    conccorp = manatee.Corpus(qq[2:])
                    break
            conc = None
            try:
                if not _min_conc_unfinished(cache_map=cache_map,
                                            subchash=subchash,
                                            q=q[:i],
                                            minsize=minsize):
                    conc = PyConc(conccorp, 'l', cachefile, orig_corp=corp)
            except (ConcCalculationControlException,
                    manatee.FileAccessError) as ex:
                logging.getLogger(__name__).error(
                    'Failed to join unfinished calculation: {0}'.format(ex))
                _cancel_async_task(cache_map, subchash, q[:i])
                continue
            ans = (i, conc)
            break
    logging.getLogger(__name__).debug(
        'get_cached_conc(%s, [%s]) -> %s, %01.4f' %
        (corp.corpname, ','.join(q), 'hit' if ans[1] else 'miss',
         time.time() - start_time))
    return ans
コード例 #6
0
ファイル: conclib.py プロジェクト: simar0at/kontext
def _get_cached_conc(corp, subchash, q, pid_dir, minsize):
    """
    Loads a concordance from cache
    """
    start_time = time.time()
    q = tuple(q)
    if not os.path.isdir(pid_dir):
        os.makedirs(pid_dir)

    cache_map = cache_factory.get_mapping(corp)
    cache_map.refresh_map()
    if _contains_shuffle_seq(q):
        srch_from = 1
    else:
        srch_from = len(q)

    ans = (0, None)
    for i in range(srch_from, 0, -1):
        cachefile = cache_map.cache_file_path(subchash, q[:i])
        if cachefile:
            pidfile = cache_map[(subchash, q[:i])][2]
            _wait_for_conc(
                corp=corp,
                q=q,
                subchash=subchash,
                cachefile=cachefile,
                cache_map=cache_map,
                pidfile=pidfile,
                minsize=minsize,
            )
            if not os.path.exists(cachefile):  # broken cache
                del cache_map[(subchash, q)]
                try:
                    os.remove(pidfile)
                except OSError:
                    pass
                continue
            conccorp = corp
            for qq in reversed(q[:i]):  # find the right main corp, if aligned
                if qq.startswith("x-"):
                    conccorp = manatee.Corpus(qq[2:])
                    break
            conc = PyConc(conccorp, "l", cachefile, orig_corp=corp)
            if not _is_conc_alive(pidfile, minsize) and not conc.finished():
                # unfinished and dead concordance
                del cache_map[(subchash, q)]
                try:
                    os.remove(cachefile)
                except OSError:
                    pass
                try:
                    os.remove(pidfile)
                except OSError:
                    pass
                continue
            ans = (i, conc)
            break
    logging.getLogger(__name__).debug(
        "get_cached_conc(%s, [%s]) -> %s, %01.4f"
        % (corp.corpname, ",".join(q), "hit" if ans[1] else "miss", time.time() - start_time)
    )
    return ans