Python get_celery_appの例、task.get_celery_app Pythonの例

コード例 #1

0

ファイルを表示

ファイル: conclib.py プロジェクト: tomachalek/kontext

def _get_async_conc(corp, user_id, q, save, subchash, samplesize, fullsize, minsize):
    """
    Note: 'save' argument is present because of bonito-open-3.45.11 compatibility but it is
    currently not used ----- TODO remove it
    """
    backend, conf = settings.get_full('global', 'calc_backend')
    if backend == 'multiprocessing':
        from concworker import mp
        mp.create_task(user_id, corp, subchash, q, samplesize).start()
    elif backend == 'celery':
        import task
        app = task.get_celery_app(conf['conf'])
        ans = app.send_task('worker.conc_register', (user_id, corp.corpname, getattr(corp, 'subcname', None),
                            subchash, q, samplesize))
        ans.get()  # = wait for task registration
    else:
        raise ValueError('Unknown concordance calculation backend: %s' % (backend,))

    cache_map = plugins.runtime.CONC_CACHE.instance.get_mapping(corp)
    try:
        _wait_for_conc(cache_map=cache_map, subchash=subchash, q=q, minsize=minsize)
    except Exception as e:
        _cancel_async_task(cache_map, subchash, q)
        raise e
    return PyConc(corp, 'l', cache_map.cache_file_path(subchash, q))

コード例 #2

0

ファイルを表示

ファイル: conclib.py プロジェクト: mirko-vogel/kontext

def _get_async_conc(corp, user_id, q, save, subchash, samplesize, fullsize,
                    minsize):
    """
    Note: 'save' argument is present because of bonito-open-3.45.11 compatibility but it is
    currently not used ----- TODO remove it
    """
    backend, conf = settings.get_full('global', 'calc_backend')
    if backend == 'multiprocessing':
        from concworker import mp
        mp.create_task(user_id, corp, subchash, q, samplesize).start()
    elif backend == 'celery':
        import task
        app = task.get_celery_app(conf['conf'])
        ans = app.send_task(
            'worker.conc_register',
            (user_id, corp.corpname, getattr(corp, 'subcname',
                                             None), subchash, q, samplesize))
        ans.get()  # = wait for task registration
    else:
        raise ValueError('Unknown concordance calculation backend: %s' %
                         (backend, ))

    cache_map = plugins.runtime.CONC_CACHE.instance.get_mapping(corp)
    try:
        _wait_for_conc(cache_map=cache_map,
                       subchash=subchash,
                       q=q,
                       minsize=minsize)
    except Exception as e:
        _cancel_async_task(cache_map, subchash, q)
        raise e
    return PyConc(corp, 'l', cache_map.cache_file_path(subchash, q))

コード例 #3

0

ファイルを表示

ファイル: conclib.py プロジェクト: anukat2015/kontext

def _get_async_conc(corp, user_id, q, save, subchash, samplesize, fullsize, minsize):
    """
    Note: 'save' argument is present because of bonito-open-3.45.11 compatibility but it is
    currently not used ----- TODO remove it
    """
    backend, conf = settings.get_full('corpora', 'conc_calc_backend')
    if backend == 'multiprocessing':
        from concworker.default import BackgroundCalc, NotifierFactory
        receiver, sender = NotifierFactory()()
        calc = BackgroundCalc(notification_sender=sender)
        proc = Process(target=calc, args=(corp, subchash, q, samplesize,))
        proc.start()
    elif backend == 'celery':
        from concworker.wcelery import NotifierFactory
        import task
        app = task.get_celery_app(conf['conf'])
        res = app.send_task('worker.register', (user_id, corp.corpname, getattr(corp, 'subcname', None),
                                                subchash, q, samplesize))
        receiver, sender = NotifierFactory(res)()
    else:
        raise ValueError('Unknown concordance calculation backend: %s' % (backend,))

    cachefile, pidfile = receiver.receive()
    try:
        _wait_for_conc(corp=corp, q=q, subchash=subchash, cachefile=cachefile,
                       cache_map=plugins.get('conc_cache').get_mapping(corp), pidfile=pidfile, minsize=minsize)
        if not os.path.exists(cachefile):
            raise RuntimeError('Concordance cache file [%s] not created. PID file: %s' %
                               (cachefile, pidfile))
    except Exception as e:
        if os.path.exists(pidfile):
            os.remove(pidfile)
        raise e
    return PyConc(corp, 'l', cachefile)

コード例 #4

0

ファイルを表示

def calculate_freqs_ct(args):
    """
    note: this is called by webserver
    """
    backend, conf = settings.get_full('global', 'calc_backend')
    if backend == 'celery':
        import task
        try:
            app = task.get_celery_app(conf['conf'])
            res = app.send_task('worker.calculate_freqs_ct',
                                args=(args.to_dict(), ),
                                time_limit=TASK_TIME_LIMIT)
            calc_result = res.get()
        except Exception as ex:
            if is_celery_user_error(ex):
                raise UserActionException(ex.message)
            else:
                raise ex
    elif backend == 'multiprocessing':
        raise NotImplementedError(
            'Multi-processing backend is not yet supported for freq_ct calculation'
        )
    else:
        raise ValueError('Invalid backend')
    return calc_result

コード例 #5

0

ファイルを表示

ファイル: coll_calc.py プロジェクト: tomachalek/kontext

def calculate_colls(coll_args):
    """
    Calculates required collocations based on passed arguments.
    Function is able to reuse cached values and utilize configured
    backend (either Celery or multiprocessing).

    returns:
    a dictionary ready to be used in a respective template (collx.tmpl)
    (keys: Head, Items, cmaxitems, attrname, processing, collstart, lastpage)
    """
    if coll_args.num_lines > 0:
        collstart = 0
        collend = coll_args.num_lines
    else:
        collstart = (int(coll_args.collpage) - 1) * \
            int(coll_args.citemsperpage) + int(coll_args.line_offset)
        collend = collstart + int(coll_args.citemsperpage) + 1

    cache = CollCalcCache(corpname=coll_args.corpname, subcname=coll_args.subcname, subcpath=coll_args.subcpath,
                          user_id=coll_args.user_id, q=coll_args.q, minsize=coll_args.minsize, save=coll_args.save,
                          samplesize=coll_args.samplesize)
    collocs, cache_path = cache.get(cattr=coll_args.cattr, csortfn=coll_args.csortfn, cbgrfns=coll_args.cbgrfns,
                                    cfromw=coll_args.cfromw, ctow=coll_args.ctow, cminbgr=coll_args.cminbgr,
                                    cminfreq=coll_args.cminfreq)
    if collocs is None:
        num_fetch_items = CollCalcCache.MANATEE_DEFAULT_NUM_FETCH_LINES
    else:
        num_fetch_items = len(collocs['Items'])

    if collocs is None or collend > num_fetch_items:
        if os.path.isfile(cache_path):  # cache avail. but not enough items
            os.unlink(cache_path)
        if collend >= num_fetch_items:
            num_fetch_items += (collend - num_fetch_items) + 10 * \
                int(coll_args.citemsperpage)  # TODO heuristics :)

        coll_args.cache_path = cache_path
        coll_args.num_fetch_items = num_fetch_items

        backend, conf = settings.get_full('global', 'calc_backend')
        if backend == 'celery':
            import task
            app = task.get_celery_app(conf['conf'])
            res = app.send_task('worker.calculate_colls', args=(coll_args.to_dict(),))
            # worker task caches the value AFTER the result is returned (see worker.py)
            ans = res.get()
        elif backend == 'multiprocessing':
            ans = calculate_colls_mp(coll_args)
    else:
        ans = dict(data=collocs, processing=0)
    result = dict(
        Head=ans['data']['Head'],
        attrname=coll_args.cattr,
        processing=ans['processing'],
        collstart=collstart,
        lastpage=0 if collstart + coll_args.citemsperpage < len(ans['data']['Items']) else 1,
        Items=ans['data']['Items'][collstart:collend - 1]
    )
    return result

コード例 #6

0

ファイルを表示

ファイル: freq_calc.py プロジェクト: tomachalek/kontext

def calculate_freqs(args):
    """
    Calculates a frequency distribution based on a defined concordance and frequency-related arguments.
    The class is able to cache the data in a background process/task. This prevents KonText to calculate
    (via Manatee) full frequency list again and again (e.g. if user moves from page to page).
    """
    cache = FreqCalcCache(corpname=args.corpname, subcname=args.subcname, user_id=args.user_id, subcpath=args.subcpath,
                          minsize=args.minsize, q=args.q, fromp=args.fromp, pagesize=args.pagesize, save=args.save,
                          samplesize=args.samplesize)
    calc_result, cache_path = cache.get(fcrit=args.fcrit, flimit=args.flimit, freq_sort=args.freq_sort, ml=args.ml,
                                        ftt_include_empty=args.ftt_include_empty, rel_mode=args.rel_mode,
                                        collator_locale=args.collator_locale)
    if calc_result is None:
        backend, conf = settings.get_full('global', 'calc_backend')
        if backend == 'celery':
            import task
            args.cache_path = cache_path
            app = task.get_celery_app(conf['conf'])
            res = app.send_task('worker.calculate_freqs', args=(args.to_dict(),))
            # worker task caches the value AFTER the result is returned (see worker.py)
            calc_result = res.get()
        if backend == 'multiprocessing':
            calc_result = calculate_freqs_mp(args)

    data = calc_result['freqs']
    conc_size = calc_result['conc_size']
    lastpage = None
    if len(data) == 1:  # a single block => pagination
        total_length = len(data[0]['Items']) if 'Items' in data[0] else 0
        items_per_page = args.fmaxitems
        fstart = (args.fpage - 1) * args.fmaxitems + args.line_offset
        fmaxitems = args.fmaxitems * args.fpage + 1 + args.line_offset
        if total_length < fmaxitems:
            lastpage = 1
        else:
            lastpage = 0
        ans = [dict(Total=total_length,
                    TotalPages=int(math.ceil(total_length / float(items_per_page))),
                    Items=data[0]['Items'][fstart:fmaxitems - 1] if 'Items' in data[0] else [],
                    Head=data[0].get('Head', []))]
    else:
        for item in data:
            if 'Items' not in item:
                item['Items'] = []
            item['Total'] = len(item['Items'])
            item['TotalPages'] = None
        ans = data
        fstart = None
    return dict(lastpage=lastpage, data=ans, fstart=fstart, fmaxitems=args.fmaxitems, conc_size=conc_size)

コード例 #7

0

ファイルを表示

def _get_async_conc(corp, user_id, q, save, subchash, samplesize, fullsize,
                    minsize):
    """
    Note: 'save' argument is present because of bonito-open-3.45.11 compatibility but it is
    currently not used ----- TODO remove it
    """
    backend, conf = settings.get_full('corpora', 'conc_calc_backend')
    if backend == 'multiprocessing':
        from concworker.default import BackgroundCalc, NotifierFactory
        receiver, sender = NotifierFactory()()
        calc = BackgroundCalc(notification_sender=sender)
        proc = Process(target=calc, args=(
            corp,
            subchash,
            q,
            samplesize,
        ))
        proc.start()
    elif backend == 'celery':
        from concworker.wcelery import NotifierFactory
        import task
        app = task.get_celery_app(conf['conf'])
        res = app.send_task(
            'worker.register',
            (user_id, corp.corpname, getattr(corp, 'subcname',
                                             None), subchash, q, samplesize))
        receiver, sender = NotifierFactory(res)()
    else:
        raise ValueError('Unknown concordance calculation backend: %s' %
                         (backend, ))

    cachefile, pidfile = receiver.receive()
    try:
        _wait_for_conc(corp=corp,
                       q=q,
                       subchash=subchash,
                       cachefile=cachefile,
                       cache_map=plugins.get('conc_cache').get_mapping(corp),
                       pidfile=pidfile,
                       minsize=minsize)
        if not os.path.exists(cachefile):
            raise RuntimeError(
                'Concordance cache file [%s] not created. PID file: %s' %
                (cachefile, pidfile))
    except Exception as e:
        if os.path.exists(pidfile):
            os.remove(pidfile)
        raise e
    return PyConc(corp, 'l', cachefile)

コード例 #8

0

ファイルを表示

ファイル: conclib.py プロジェクト: tomachalek/kontext

def _cancel_async_task(cache_map, subchash, q):
    cachefile = cache_map.cache_file_path(subchash, q)
    status = cache_map.get_calc_status(subchash, q)
    backend, conf = settings.get_full('global', 'calc_backend')
    if backend == 'multiprocessing':
        logging.getLogger(__name__).warning('Unable to cancel async task in multiprocessing mode')
    elif backend == 'celery' and status:
        import task
        try:
            if status.task_id:
                app = task.get_celery_app(conf['conf'])
                app.control.revoke(status.task_id, terminate=True, signal='SIGKILL')
        except IOError:
            pass
    cache_map.del_entry(subchash, q)
    _del_silent(cachefile)

コード例 #9

0

ファイルを表示

ファイル: conclib.py プロジェクト: ee-dev/lindat-kontext

def _cancel_async_task(cache_map, subchash, q):
    cachefile = cache_map.cache_file_path(subchash, q)
    status = cache_map.get_calc_status(subchash, q)
    backend, conf = settings.get_full('global', 'calc_backend')
    if backend == 'multiprocessing':
        logging.getLogger(__name__).warning('Unable to cancel async task in multiprocessing mode')
    elif backend == 'celery' and status:
        import task
        try:
            if status.task_id:
                app = task.get_celery_app(conf['conf'])
                app.control.revoke(status.task_id, terminate=True, signal='SIGKILL')
        except IOError:
            pass
    cache_map.del_entry(subchash, q)
    _del_silent(cachefile)

コード例 #10

0

ファイルを表示

def build_arf_db(corp, attrname):
    """
    Provides a higher level wrapper to create_arf_db(). Function creates
    a background process where create_arf_db() is run.
    """
    base_path = corp_freqs_cache_path(corp, attrname)
    if calc_is_running(base_path):
        curr_status = _get_total_calc_status(base_path)
        if curr_status < 100:
            return curr_status

    subc_path = prepare_arf_calc_paths(corp, attrname)
    backend, conf = settings.get_full('global', 'calc_backend')
    if backend == 'celery':
        import task
        app = task.get_celery_app(conf['conf'])
        task_ids = []
        for m in ('frq', 'arf', 'docf'):
            logfilename_m = create_log_path(base_path, m)
            write_log_header(corp, logfilename_m)
            res = app.send_task(
                'worker.compile_{0}'.format(m),
                (corp.corpname, subc_path, attrname, logfilename_m),
                time_limit=TASK_TIME_LIMIT)
            task_ids.append(res.id)
        return task_ids

    elif backend == 'multiprocessing':
        import subprocess

        for m in ('frq', 'arf', 'docf'):
            logfilename_m = create_log_path(base_path, m)
            open(logfilename_m,
                 'w').write('%d\n%s\n0 %%' % (os.getpid(), corp.search_size()))
            log = " 2>> '%s'" % logfilename_m
            if subc_path:
                cmd = u"mkstats '%s' '%s' %%s '%s' %s" % (
                    corp.get_confpath(), attrname, subc_path.decode('utf-8'),
                    log.decode('utf-8'))
                cmd = cmd.encode('utf-8')
            else:
                cmd = "mkstats '%s' '%s' %%s %s" % (corp.get_confpath(),
                                                    attrname, log)
            subprocess.call(cmd % 'frq', shell=True)
        return []

コード例 #11

0

ファイルを表示

ファイル: freq_calc.py プロジェクト: tomachalek/kontext

def build_arf_db(corp, attrname):
    """
    Provides a higher level wrapper to create_arf_db(). Function creates
    a background process where create_arf_db() is run.
    """
    base_path = corp_freqs_cache_path(corp, attrname)
    if calc_is_running(base_path):
        curr_status = _get_total_calc_status(base_path)
        if curr_status < 100:
            return curr_status

    subc_path = prepare_arf_calc_paths(corp, attrname)
    backend, conf = settings.get_full('global', 'calc_backend')
    if backend == 'celery':
        import task
        app = task.get_celery_app(conf['conf'])
        task_ids = []
        for m in ('frq', 'arf', 'docf'):
            logfilename_m = create_log_path(base_path, m)
            write_log_header(corp, logfilename_m)
            res = app.send_task('worker.compile_%s' %
                                m, (corp.corpname, subc_path, attrname, logfilename_m))
            task_ids.append(res.id)
        return task_ids

    elif backend == 'multiprocessing':
        import subprocess

        for m in ('frq', 'arf', 'docf'):
            logfilename_m = create_log_path(base_path, m)
            open(logfilename_m, 'w').write('%d\n%s\n0 %%' % (os.getpid(), corp.search_size()))
            log = " 2>> '%s'" % logfilename_m
            if subc_path:
                cmd = u"mkstats '%s' '%s' %%s '%s' %s" % (corp.get_confpath(), attrname,
                                                          subc_path.decode('utf-8'), log.decode('utf-8'))
                cmd = cmd.encode('utf-8')
            else:
                cmd = "mkstats '%s' '%s' %%s %s" % (corp.get_confpath(), attrname, log)
            subprocess.call(cmd % 'frq', shell=True)
        return []

コード例 #12

0

ファイルを表示

ファイル: freq_calc.py プロジェクト: tomachalek/kontext

def calculate_freqs_ct(args):
    """
    note: this is called by webserver
    """
    backend, conf = settings.get_full('global', 'calc_backend')
    if backend == 'celery':
        import task
        try:
            app = task.get_celery_app(conf['conf'])
            res = app.send_task('worker.calculate_freqs_ct', args=(args.to_dict(),))
            calc_result = res.get()
        except Exception as ex:
            if is_celery_user_error(ex):
                raise UserActionException(ex.message)
            else:
                raise ex
    elif backend == 'multiprocessing':
        raise NotImplementedError(
            'Multi-processing backend is not yet supported for freq_ct calculation')
    else:
        raise ValueError('Invalid backend')
    return calc_result

コード例 #13

0

ファイルを表示

    def _create_subcorpus(self, request):
        """
        req. arguments:
        subcname -- name of new subcorpus
        create -- bool, sets whether to create new subcorpus
        cql -- custom within condition
        """
        subcname = request.form['subcname']
        within_json = request.form.get('within_json')
        raw_cql = request.form.get('cql')
        aligned_corpora = request.form.getlist('aligned_corpora')
        publish = bool(int(request.form.get('publish')))
        corpus_info = self.get_corpus_info(self.args.corpname)
        description = request.form.get('description')

        if raw_cql:
            aligned_corpora = []
            tt_query = ()
            within_cql = raw_cql
            full_cql = 'aword,[] %s' % raw_cql
            imp_cql = (full_cql, )
        elif within_json:  # user entered a subcorpus query manually
            aligned_corpora = []
            tt_query = ()
            within_cql = self._deserialize_custom_within(
                json.loads(within_json))
            full_cql = 'aword,[] %s' % within_cql
            imp_cql = (full_cql, )
        elif len(aligned_corpora
                 ) > 0 and plugins.runtime.LIVE_ATTRIBUTES.exists:
            if corpus_info.metadata.label_attr and corpus_info.metadata.id_attr:
                within_cql = None
                attrs = json.loads(request.form.get('attrs', '{}'))
                sel_match = plugins.runtime.LIVE_ATTRIBUTES.instance.get_attr_values(
                    self._plugin_api,
                    corpus=self.corp,
                    attr_map=attrs,
                    aligned_corpora=aligned_corpora,
                    limit_lists=False)
                values = sel_match['attr_values'][
                    corpus_info.metadata.label_attr]
                args = argmapping.Args()
                setattr(args, 'sca_{0}'.format(corpus_info.metadata.id_attr),
                        [v[1] for v in values])
                tt_query = TextTypeCollector(self.corp, args).get_query()
                tmp = ['<%s %s />' % item for item in tt_query]
                full_cql = ' within '.join(tmp)
                full_cql = 'aword,[] within %s' % full_cql
                full_cql = import_string(full_cql,
                                         from_encoding=self.corp_encoding)
                imp_cql = (full_cql, )
            else:
                raise FunctionNotSupported(
                    'Corpus must have a bibliography item defined to support this function'
                )
        else:
            within_cql = None
            tt_query = TextTypeCollector(self.corp, request).get_query()
            tmp = ['<%s %s />' % item for item in tt_query]
            full_cql = ' within '.join(tmp)
            full_cql = 'aword,[] within %s' % full_cql
            full_cql = import_string(full_cql,
                                     from_encoding=self.corp_encoding)
            imp_cql = (full_cql, )

        basecorpname = self.args.corpname.split(':')[0]
        if not subcname:
            raise UserActionException(_('No subcorpus name specified!'))
        path = self.prepare_subc_path(basecorpname, subcname, publish=False)
        publish_path = self.prepare_subc_path(
            basecorpname, subcname, publish=True) if publish else None

        if type(path) == unicode:
            path = path.encode('utf-8')

        if len(tt_query) == 1 and len(aligned_corpora) == 0:
            result = corplib.create_subcorpus(path, self.corp, tt_query[0][0],
                                              tt_query[0][1])
            if result and publish_path:
                corplib.mk_publish_links(path, publish_path, description)
        elif len(tt_query) > 1 or within_cql or len(aligned_corpora) > 0:
            backend, conf = settings.get_full('global', 'calc_backend')
            if backend == 'celery':
                import task
                app = task.get_celery_app(conf['conf'])
                res = app.send_task(
                    'worker.create_subcorpus',
                    (self.session_get('user', 'id'), self.args.corpname, path,
                     publish_path, tt_query, imp_cql, description),
                    time_limit=TASK_TIME_LIMIT)
                self._store_async_task(
                    AsyncTaskStatus(
                        status=res.status,
                        ident=res.id,
                        category=AsyncTaskStatus.CATEGORY_SUBCORPUS,
                        label=u'%s:%s' % (basecorpname, subcname),
                        args=dict(subcname=subcname, corpname=basecorpname)))
                result = {}
            elif backend == 'multiprocessing':
                from bgcalc import subc_calc
                import functools
                import multiprocessing
                worker = subc_calc.CreateSubcorpusTask(
                    user_id=self.session_get('user', 'id'),
                    corpus_id=self.args.corpname)
                multiprocessing.Process(target=functools.partial(
                    worker.run, tt_query, imp_cql, path, publish_path,
                    description)).start()
                result = {}
        else:
            raise UserActionException(_('Nothing specified!'))
        if result is not False:
            with plugins.runtime.SUBC_RESTORE as sr:
                try:
                    sr.store_query(user_id=self.session_get('user', 'id'),
                                   corpname=self.args.corpname,
                                   subcname=subcname,
                                   cql=full_cql.strip().split('[]', 1)[-1])
                except Exception as e:
                    logging.getLogger(__name__).warning(
                        'Failed to store subcorpus query: %s' % e)
                    self.add_system_message(
                        'warning',
                        _('Subcorpus created but there was a problem saving a backup copy.'
                          ))
            unfinished_corpora = filter(
                lambda at: not at.is_finished(),
                self.get_async_tasks(
                    category=AsyncTaskStatus.CATEGORY_SUBCORPUS))
            return dict(
                unfinished_subc=[uc.to_dict() for uc in unfinished_corpora])
        else:
            raise SubcorpusError(_('Empty subcorpus!'))

コード例 #14

0

ファイルを表示

initializer.init_plugin('user_items')
initializer.init_plugin('corparch')
initializer.init_plugin('live_attributes', optional=True)

translation.load_translations(settings.get('global', 'translations'))
translation.activate('en_US')  # background jobs do not need localization

import concworker
import task
from bgcalc import freq_calc
from bgcalc import subc_calc
from bgcalc import coll_calc


_, conf = settings.get_full('global', 'calc_backend')
app = task.get_celery_app(conf['conf'])


def load_script_module(name, path):
    return imp.load_source(name, path)


class WorkerTaskException(Exception):
    pass


def is_compiled(corp, attr, method):
    """
    Test whether pre-calculated data for particular
    combination corpus+attribute+method (arf, docf, frq)
    already exist.

コード例 #15

0

ファイルを表示

def calculate_colls(coll_args):
    """
    Calculates required collocations based on passed arguments.
    Function is able to reuse cached values and utilize configured
    backend (either Celery or multiprocessing).

    returns:
    a dictionary ready to be used in a respective template (collx.tmpl)
    (keys: Head, Items, cmaxitems, attrname, processing, collstart, lastpage)
    """
    if coll_args.num_lines > 0:
        collstart = 0
        collend = coll_args.num_lines
    else:
        collstart = (int(coll_args.collpage) - 1) * \
            int(coll_args.citemsperpage) + int(coll_args.line_offset)
        collend = collstart + int(coll_args.citemsperpage) + 1

    cache = CollCalcCache(corpname=coll_args.corpname,
                          subcname=coll_args.subcname,
                          subcpath=coll_args.subcpath,
                          user_id=coll_args.user_id,
                          q=coll_args.q,
                          minsize=coll_args.minsize,
                          save=coll_args.save,
                          samplesize=coll_args.samplesize)
    collocs, cache_path = cache.get(cattr=coll_args.cattr,
                                    csortfn=coll_args.csortfn,
                                    cbgrfns=coll_args.cbgrfns,
                                    cfromw=coll_args.cfromw,
                                    ctow=coll_args.ctow,
                                    cminbgr=coll_args.cminbgr,
                                    cminfreq=coll_args.cminfreq)
    if collocs is None:
        num_fetch_items = CollCalcCache.MANATEE_DEFAULT_NUM_FETCH_LINES
    else:
        num_fetch_items = len(collocs['Items'])

    if collocs is None or collend > num_fetch_items:
        if os.path.isfile(cache_path):  # cache avail. but not enough items
            os.unlink(cache_path)
        if collend >= num_fetch_items:
            num_fetch_items += (collend - num_fetch_items) + 10 * \
                int(coll_args.citemsperpage)  # TODO heuristics :)

        coll_args.cache_path = cache_path
        coll_args.num_fetch_items = num_fetch_items

        backend, conf = settings.get_full('global', 'calc_backend')
        if backend == 'celery':
            import task
            app = task.get_celery_app(conf['conf'])
            res = app.send_task('worker.calculate_colls',
                                args=(coll_args.to_dict(), ),
                                time_limit=TASK_TIME_LIMIT)
            # worker task caches the value AFTER the result is returned (see worker.py)
            ans = res.get()
        elif backend == 'multiprocessing':
            ans = calculate_colls_mp(coll_args)
    else:
        ans = dict(data=collocs, processing=0)
    result = dict(
        Head=ans['data']['Head'],
        attrname=coll_args.cattr,
        processing=ans['processing'],
        collstart=collstart,
        lastpage=0 if
        collstart + coll_args.citemsperpage < len(ans['data']['Items']) else 1,
        Items=ans['data']['Items'][collstart:collend - 1])
    return result

コード例 #16

0

ファイルを表示

ファイル: freq_calc.py プロジェクト: mirko-vogel/kontext

def calculate_freqs(args):
    """
    Calculates a frequency distribution based on a defined concordance and frequency-related arguments.
    The class is able to cache the data in a background process/task. This prevents KonText to calculate
    (via Manatee) full frequency list again and again (e.g. if user moves from page to page).
    """
    cache = FreqCalcCache(corpname=args.corpname,
                          subcname=args.subcname,
                          user_id=args.user_id,
                          subcpath=args.subcpath,
                          minsize=args.minsize,
                          q=args.q,
                          fromp=args.fromp,
                          pagesize=args.pagesize,
                          save=args.save,
                          samplesize=args.samplesize)
    calc_result, cache_path = cache.get(
        fcrit=args.fcrit,
        flimit=args.flimit,
        freq_sort=args.freq_sort,
        ml=args.ml,
        ftt_include_empty=args.ftt_include_empty,
        rel_mode=args.rel_mode,
        collator_locale=args.collator_locale)
    if calc_result is None:
        backend, conf = settings.get_full('global', 'calc_backend')
        if backend == 'celery':
            import task
            args.cache_path = cache_path
            app = task.get_celery_app(conf['conf'])
            res = app.send_task('worker.calculate_freqs',
                                args=(args.to_dict(), ))
            # worker task caches the value AFTER the result is returned (see worker.py)
            calc_result = res.get()
        if backend == 'multiprocessing':
            calc_result = calculate_freqs_mp(args)

    data = calc_result['freqs']
    conc_size = calc_result['conc_size']
    lastpage = None
    if len(data) == 1:  # a single block => pagination
        total_length = len(data[0]['Items']) if 'Items' in data[0] else 0
        items_per_page = args.fmaxitems
        fstart = (args.fpage - 1) * args.fmaxitems + args.line_offset
        fmaxitems = args.fmaxitems * args.fpage + 1 + args.line_offset
        if total_length < fmaxitems:
            lastpage = 1
        else:
            lastpage = 0
        ans = [
            dict(Total=total_length,
                 TotalPages=int(math.ceil(total_length /
                                          float(items_per_page))),
                 Items=data[0]['Items'][fstart:fmaxitems -
                                        1] if 'Items' in data[0] else [],
                 Head=data[0].get('Head', []))
        ]
    else:
        for item in data:
            item['Total'] = len(item['Items']) if 'Items' in item else 0
            item['TotalPages'] = None
        ans = data
        fstart = None
    return dict(lastpage=lastpage,
                data=ans,
                fstart=fstart,
                fmaxitems=args.fmaxitems,
                conc_size=conc_size)