Ejemplo n.º 1
0
    def _load_query_history(self, offset, limit, from_date, to_date,
                            query_type, current_corpus):
        if plugins.has_plugin('query_storage'):
            from query_history import Export

            if current_corpus:
                corpname = self.args.corpname
            else:
                corpname = None

            exporter = Export(corpus_manager=self.cm,
                              corpname_canonizer=self._canonical_corpname,
                              url_creator=self.create_url)
            rows = plugins.get('query_storage').get_user_queries(
                self._session_get('user', 'id'),
                offset=offset,
                limit=limit,
                query_type=query_type,
                corpname=corpname,
                from_date=from_date,
                to_date=to_date)
            rows = filter(lambda x: x is not None,
                          [exporter.export_row(row) for row in rows])
        else:
            rows = ()
        return rows
Ejemplo n.º 2
0
def get_lang(environ):
    """
    Detects user's preferred language (either via the 'getlang' plugin or from HTTP_ACCEPT_LANGUAGE env value)

    arguments:
    environ -- WSGI environment variable

    returns:
    underscore-separated ISO 639 language code and ISO 3166 country code
    """
    installed = dict([
        (x.split('_')[0], x)
        for x in os.listdir('%s/../locale' % os.path.dirname(__file__))
    ])

    if plugins.has_plugin('getlang'):
        lgs_string = plugins.get('getlang').fetch_current_language(
            KonTextCookie(environ.get('HTTP_COOKIE', '')))
    else:
        lgs_string = parse_accept_header(
            environ.get('HTTP_ACCEPT_LANGUAGE')).best
        if len(lgs_string
               ) == 2:  # in case we obtain just an ISO 639 language code
            lgs_string = installed.get(lgs_string)
        else:
            lgs_string = lgs_string.replace('-', '_')
    if lgs_string is None:
        lgs_string = 'en_US'
    return lgs_string
Ejemplo n.º 3
0
    def _create_subcorpus(self, request):
        """
        req. arguments:
        subcname -- name of new subcorpus
        create -- bool, sets whether to create new subcorpus
        cql -- custom within condition
        """
        subcname = request.form['subcname']
        within_json = request.form.get('within_json')
        raw_cql = request.form.get('cql')
        corp_encoding = self._corp().get_conf('ENCODING')

        if raw_cql:
            tt_query = ()
            within_cql = raw_cql
            full_cql = 'aword,[] %s' % raw_cql
            imp_cql = (full_cql,)
        elif within_json:  # user entered a subcorpus query manually
            tt_query = ()
            within_cql = self._deserialize_custom_within(json.loads(within_json))
            full_cql = 'aword,[] %s' % within_cql
            imp_cql = (full_cql,)
        else:
            tt_query = TextTypeCollector(self._corp(), request).get_query()
            full_cql = ' within '.join(['<%s %s />' % item for item in tt_query])
            full_cql = 'aword,[] within %s' % full_cql
            full_cql = import_string(full_cql, from_encoding=corp_encoding)
            imp_cql = (full_cql,)
        basecorpname = self.args.corpname.split(':')[0]
        if not subcname:
            raise UserActionException(_('No subcorpus name specified!'))
        path = self.prepare_subc_path(basecorpname, subcname)

        if type(path) == unicode:
            path = path.encode('utf-8')

        if len(tt_query) == 1:
            result = corplib.create_subcorpus(path, self._corp(), tt_query[0][0], tt_query[0][1])
        elif len(tt_query) > 1 or within_cql:
            conc = conclib.get_conc(self._corp(), self._session_get('user', 'user'), q=imp_cql)
            conc.sync()
            struct = self._corp().get_struct(tt_query[0][0]) if len(tt_query) == 1 else None
            result = corplib.subcorpus_from_conc(path, conc, struct)
        else:
            raise UserActionException(_('Nothing specified!'))

        if result:
            if plugins.has_plugin('subc_restore'):
                try:
                    plugins.get('subc_restore').store_query(user_id=self._session_get('user', 'id'),
                                                            corpname=self.args.corpname,
                                                            subcname=subcname,
                                                            cql=full_cql.split('[]')[-1])
                except Exception as e:
                    logging.getLogger(__name__).warning('Failed to store subcorpus query: %s' % e)
                    self.add_system_message('warning',
                                            _('Subcorpus created but there was a problem saving a backup copy.'))
            return {}
        else:
            raise ConcError(_('Empty subcorpus!'))
Ejemplo n.º 4
0
    def _create_subcorpus(self, request):
        """
        req. arguments:
        subcname -- name of new subcorpus
        create -- bool, sets whether to create new subcorpus
        within_condition -- custom within condition; if non-empty then clickable form is omitted
        within_struct -- a structure the within_condition will be applied to
        """
        subcname = request.form['subcname']
        within_condition = request.form['within_condition']
        within_struct = request.form['within_struct']
        corp_encoding = self._corp().get_conf('ENCODING')

        if within_condition and within_struct:  # user entered a subcorpus query manually
            tt_query = [(export_string(within_struct, to_encoding=corp_encoding),
                        export_string(within_condition, to_encoding=corp_encoding))]
        else:
            tt_query = self._texttype_query(request)
            within_struct = import_string(tt_query[0][0], from_encoding=corp_encoding)
            within_condition = import_string(tt_query[0][1], from_encoding=corp_encoding)

        basecorpname = self.args.corpname.split(':')[0]
        if not subcname:
            raise ConcError(_('No subcorpus name specified!'))

        path = os.path.join(self.subcpath[-1], basecorpname)
        if not os.path.isdir(path):
            os.makedirs(path)
        path = os.path.join(path, subcname) + '.subc'
        if not tt_query:
            raise ConcError(_('Nothing specified!'))

        # Even if _texttype_query() parsed multiple structures into tt_query,
        # Manatee can accept directly only one (but with arbitrarily complex attribute
        # condition).
        # For this reason, we choose only the first struct+condition pair.
        # It is up to the user interface to deal with it.
        structname, subquery = tt_query[0]
        if type(path) == unicode:
            path = path.encode("utf-8")
        if corplib.create_subcorpus(path, self._corp(), structname, subquery):
            if plugins.has_plugin('subc_restore'):
                try:
                    plugins.get('subc_restore').store_query(user_id=self._session_get('user', 'id'),
                                                            corpname=self.args.corpname,
                                                            subcname=subcname,
                                                            structname=within_struct,
                                                            condition=within_condition)
                except Exception as e:
                    logging.getLogger(__name__).warning('Failed to store subcorpus query: %s' % e)
                    self.add_system_message('warning',
                                            _('Subcorpus created but there was a problem saving a backup copy.'))
            return {}
        else:
            raise ConcError(_('Empty subcorpus!'))
Ejemplo n.º 5
0
 def ajax_wipe_subcorpus(self, request):
     if plugins.has_plugin('subc_restore'):
         corpus_id = request.form['corpname']
         subcorp_name = request.form['subcname']
         plugins.get('subc_restore').delete_query(self._session_get('user', 'id'),
                                                  corpus_id, subcorp_name)
         self.add_system_message('info',
                                 _('Subcorpus %s has been deleted permanently.') % subcorp_name)
     else:
         self.add_system_message('error', _('Unsupported operation (plug-in not present)'))
     return {}
Ejemplo n.º 6
0
 def ajax_wipe_subcorpus(self, request):
     if plugins.has_plugin('subc_restore'):
         corpus_id = request.form['corpname']
         subcorp_name = request.form['subcname']
         plugins.get('subc_restore').delete_query(self._session_get('user', 'id'),
                                                  corpus_id, subcorp_name)
         self.add_system_message('info',
                                 _('Subcorpus %s has been deleted permanently.') % subcorp_name)
     else:
         self.add_system_message('error', _('Unsupported operation (plug-in not present)'))
     return {}
Ejemplo n.º 7
0
    def export_with_norms(self, subcorpattrs='', format_num=True, ret_nums=True, subcnorm='tokens'):
        """
        Returns a text types table containing also an information about
        total occurrences of respective attribute values.

        See corplib.texttype_values for arguments and returned value
        """
        ans = {}
        if not subcorpattrs:
            subcorpattrs = self._corp.get_conf('SUBCORPATTRS')
            if not subcorpattrs:
                subcorpattrs = self._corp.get_conf('FULLREF')
        if not subcorpattrs or subcorpattrs == '#':
            raise TextTypesException(
                _('Missing display configuration of structural attributes (SUBCORPATTRS or FULLREF).'))

        corpus_info = plugins.get('corparch').get_corpus_info(self._corpname)
        maxlistsize = settings.get_int('global', 'max_attr_list_size')
        # if 'live_attributes' are installed then always shrink bibliographical
        # entries even if their count is < maxlistsize
        subcorp_attr_list = re.split(r'\s*[,|]\s*', subcorpattrs)

        if plugins.has_plugin('live_attributes'):
            ans['bib_attr'] = corpus_info['metadata']['label_attr']
            list_none = (ans['bib_attr'], )
            tmp = [s for s in subcorp_attr_list]  # making copy here
            if ans['bib_attr'] and ans['bib_attr'] not in tmp:  # if bib type is not in subcorpattrs
                tmp.append(ans['bib_attr'])                     # we add it there
                subcorpattrs = '|'.join(tmp)  # we ignore NoSkE '|' vs. ',' stuff deliberately here
        else:
            ans['bib_attr'] = None
            list_none = ()

        tt = self._tt_cache.get_values(corp=self._corp, subcorpattrs=subcorpattrs, maxlistsize=maxlistsize,
                                       shrink_list=list_none, collator_locale=corpus_info.collator_locale)
        self._add_tt_custom_metadata(tt)

        if ret_nums:
            struct_calc = collections.OrderedDict()
            for item in subcorp_attr_list:
                k = item.split('.')[0]
                struct_calc[k] = CachedStructNormsCalc(self._corp, k, subcnorm, db=plugins.get('db'))
            for col in reduce(lambda p, c: p + c['Line'], tt, []):
                if 'textboxlength' not in col:
                    structname, attrname = col['name'].split('.')
                    for val in col['Values']:
                        v = struct_calc[structname].compute_norm(attrname, val['v'])
                        val['xcnt'] = l10n.format_number(v) if format_num else v
            ans['Blocks'] = tt
            ans['Normslist'] = self._get_normslist(struct_calc.keys()[0])
        else:
            ans['Blocks'] = tt
            ans['Normslist'] = []
        return ans
Ejemplo n.º 8
0
    def load_topbar(self):
        if plugins.has_plugin('application_bar'):
            import urlparse

            html = plugins.get('application_bar').get_contents(self.cookies, 'en', '/')
            parts = urlparse.urlparse(self.environ['REQUEST_URI'])
            css = '<link rel="stylesheet" type="text/css" href="%s://www.korpus.cz/toolbar/css/cnc-toolbar.css" />' \
                % parts.scheme
        else:
            html = ''
            css = ''
        return html, css
Ejemplo n.º 9
0
    def load_topbar(self):
        if plugins.has_plugin('application_bar'):
            import urlparse

            html = plugins.get('application_bar').get_contents(
                self.cookies, 'en', '/')
            parts = urlparse.urlparse(self.environ['REQUEST_URI'])
            css = '<link rel="stylesheet" type="text/css" href="%s://www.korpus.cz/toolbar/css/cnc-toolbar.css" />' \
                % parts.scheme
        else:
            html = ''
            css = ''
        return html, css
Ejemplo n.º 10
0
 def ajax_subcorp_info(self, subcname=''):
     sc = self.cm.get_Corpus(self.args.corpname, subcname)
     ans = {
         'corpusName': self._canonical_corpname(self.args.corpname),
         'subCorpusName': subcname,
         'corpusSize': format_number(sc.size()),
         'subCorpusSize': format_number(sc.search_size()),
         'extended_info': {}
     }
     if plugins.has_plugin('subc_restore'):
         tmp = plugins.get('subc_restore').get_info(self._session_get('user', 'id'),
                                                    self.args.corpname, subcname)
         if tmp:
             ans['extended_info'].update(tmp)
     return ans
Ejemplo n.º 11
0
    def _load_query_history(self, offset, limit, from_date, to_date, query_type, current_corpus):
        if plugins.has_plugin('query_storage'):
            from query_history import Export

            if current_corpus:
                corpname = self.args.corpname
            else:
                corpname = None

            exporter = Export(corpus_manager=self.cm, corpname_canonizer=self._canonical_corpname,
                              url_creator=self.create_url)
            rows = plugins.get('query_storage').get_user_queries(
                self._session_get('user', 'id'),
                offset=offset, limit=limit,
                query_type=query_type, corpname=corpname,
                from_date=from_date, to_date=to_date)
            rows = [exporter.export_row(row) for row in rows]
        else:
            rows = ()
        return rows
Ejemplo n.º 12
0
    def subcorp_form(self, request, conc_args):
        """
        Displays a form to create a new subcorpus
        """
        self.disabled_menu_items = self.CONCORDANCE_ACTIONS
        method = request.form.get('method', 'gui')
        within_json = request.form.get('within_json', 'null')
        subcname = request.form.get('subcname', None)
        subcnorm = request.args.get('subcnorm', 'tokens')

        try:
            tt_sel = get_tt(self._corp(), self.ui_lang).export_with_norms(subcnorm=subcnorm)
        except UserActionException as e:
            tt_sel = {'Normslist': [], 'Blocks': []}
            self.add_system_message('warning', e)
        structs_and_attrs = {}
        for s, a in [t.split('.') for t in self._corp().get_conf('STRUCTATTRLIST').split(',')]:
            if s not in structs_and_attrs:
                structs_and_attrs[s] = []
            structs_and_attrs[s].append(a)

        out = {'SubcorpList': ()}
        if self.environ['REQUEST_METHOD'] == 'POST':
            self._store_checked_text_types(request.form, out)

        if plugins.has_plugin('subcmixer'):
            out['subcmixer_form_data'] = plugins.get('subcmixer').form_data(self._plugin_api)
        else:
            out['subcmixer_form_data'] = {}
        out.update({
            'TextTypeSel': tt_sel,
            'structs_and_attrs': structs_and_attrs,
            'method': method,
            'within_json': within_json,
            'subcname': subcname,
            'subcnorm': subcnorm
        })
        return out
Ejemplo n.º 13
0
def get_lang(environ):
    """
    Detects user's preferred language (either via the 'getlang' plugin or from HTTP_ACCEPT_LANGUAGE env value)

    arguments:
    environ -- WSGI environment variable

    returns:
    underscore-separated ISO 639 language code and ISO 3166 country code
    """
    installed = dict([(x.split('_')[0], x) for x in os.listdir('%s/../locale' % os.path.dirname(__file__))])

    if plugins.has_plugin('getlang'):
        lgs_string = plugins.get('getlang').fetch_current_language(KonTextCookie(
            environ.get('HTTP_COOKIE', '')))
    else:
        lgs_string = parse_accept_header(environ.get('HTTP_ACCEPT_LANGUAGE')).best
        if len(lgs_string) == 2:  # in case we obtain just an ISO 639 language code
            lgs_string = installed.get(lgs_string)
        else:
            lgs_string = lgs_string.replace('-', '_')
    if lgs_string is None:
        lgs_string = 'en_US'
    return lgs_string
Ejemplo n.º 14
0
    def subcorp_list(self, request):
        """
        Displays a list of user subcorpora. In case there is a 'subc_restore' plug-in
        installed then the list is enriched by additional re-use/undelete information.
        """
        self.disabled_menu_items = (MainMenu.VIEW, MainMenu.FILTER, MainMenu.FREQUENCY,
                                    MainMenu.COLLOCATIONS, MainMenu.SAVE, MainMenu.CONCORDANCE)

        sort = 'n'  # TODO
        show_deleted = int(request.args.get('show_deleted', 0))
        current_corp = self.args.corpname
        if self.get_http_method() == 'POST':
            selected_subc = request.form.getlist('selected_subc')
            self._delete_subcorpora(selected_subc)

        data = []
        for corp in plugins.get('auth').permitted_corpora(self._session_get('user', 'id')).values():
            try:
                self.cm.get_Corpus(corp)
                basecorpname = corp.split(':')[0]
                for item in self.cm.subcorp_names(basecorpname):
                    sc = self.cm.get_Corpus(corp, item['n'])
                    subc_id = '%s:%s' % (corp, item['n'])
                    data.append({
                        'n': subc_id,
                        'v': item['n'],
                        'size': sc.search_size(),
                        'created': sc.created,
                        'corpname': corp,
                        'usesubcorp': item['n'],
                        'deleted': False
                    })
            except Exception as e:
                for d in data:
                    # permitted_corpora does this
                    d['usesubcorp'] = werkzeug.urls.url_quote(d['usesubcorp'], unsafe='+')
                logging.getLogger(__name__).warn(
                    'Failed to fetch information about subcorpus of [%s]: %s' % (corp, e))

        if plugins.has_plugin('subc_restore'):
            try:
                full_list = plugins.get('subc_restore').extend_subc_list(
                    data, self._session_get('user', 'id'),
                    bool(show_deleted), 0)
            except Exception as e:
                logging.getLogger(__name__).error('subc_restore plug-in failed to list queries: %s' % e)
                full_list = []
        else:
            full_list = data

        # TODO sorting does not work
        sort_key, rev = Kontext._parse_sorting_param(sort)
        if sort_key in ('size', 'created'):
            data = sorted(data, key=lambda x: x[sort_key], reverse=rev)
        else:
            data = l10n.sort(data, loc=self.ui_lang, key=lambda x: x[sort_key], reverse=rev)

        sort_keys = dict([(x, (x, '')) for x in ('n', 'size', 'created')])
        if not rev:
            sort_keys[sort_key] = ('-%s' % sort_key, '&#8593;')
        else:
            sort_keys[sort_key] = (sort_key, '&#8595;')

        # this is necessary to reset manatee module back to its original state
        self.cm.get_Corpus(current_corp)

        ans = {
            'subcorp_list': full_list,
            'sort_keys': sort_keys,
            'show_deleted': show_deleted,
            'rev': rev
        }
        self._export_subcorpora_list(ans)
        return ans
Ejemplo n.º 15
0
    def subcorp_list(self, request):
        """
        Displays a list of user subcorpora. In case there is a 'subc_restore' plug-in
        installed then the list is enriched by additional re-use/undelete information.
        """
        self.disabled_menu_items = (MainMenu.VIEW, MainMenu.FILTER, MainMenu.FREQUENCY,
                                    MainMenu.COLLOCATIONS, MainMenu.SAVE, MainMenu.CONCORDANCE)

        sort = 'n'  # TODO
        show_deleted = int(request.args.get('show_deleted', 0))
        if self.get_http_method() == 'POST':
            selected_subc = request.form.getlist('selected_subc')
            self._delete_subcorpora(selected_subc)

        data = []
        user_corpora = plugins.get('auth').permitted_corpora(self._session_get('user', 'id')).values()
        for corp in user_corpora:
            try:
                for item in self.cm.subcorp_names(corp):
                    sc = self.cm.get_Corpus(corp, item['n'])
                    data.append({
                        'n': '%s:%s' % (self._canonical_corpname(corp), item['n']),
                        'internal_n': '%s:%s' % (corp, item['n']),
                        'v': item['n'],
                        'size': sc.search_size(),
                        'created': sc.created,
                        'corpname': corp,
                        'human_corpname': sc.get_conf('NAME'),
                        'usesubcorp': item['n'],
                        'deleted': False
                    })
            except Exception as e:
                for d in data:
                    # permitted_corpora does this
                    d['usesubcorp'] = werkzeug.urls.url_quote(d['usesubcorp'], unsafe='+')
                logging.getLogger(__name__).warn(
                    'Failed to fetch information about subcorpus of [%s]: %s' % (corp, e))

        if plugins.has_plugin('subc_restore'):
            try:
                full_list = plugins.get('subc_restore').extend_subc_list(
                    data, self._session_get('user', 'id'),
                    self._canonical_corpname,
                    bool(show_deleted), 0)
            except Exception as e:
                logging.getLogger(__name__).error('subc_restore plug-in failed to list queries: %s' % e)
                full_list = []
        else:
            full_list = data

        # TODO sorting does not work
        sort_key, rev = Kontext._parse_sorting_param(sort)
        if sort_key in ('size', 'created'):
            data = sorted(data, key=lambda x: x[sort_key], reverse=rev)
        else:
            data = l10n.sort(data, loc=self.ui_lang, key=lambda x: x[sort_key], reverse=rev)

        sort_keys = dict([(x, (x, '')) for x in ('n', 'size', 'created')])
        if not rev:
            sort_keys[sort_key] = ('-%s' % sort_key, '&#8593;')
        else:
            sort_keys[sort_key] = (sort_key, '&#8595;')

        ans = {
            'SubcorpList': [],   # this is used by subcorpus SELECT element; no need for that here
            'subcorp_list': full_list,
            'sort_keys': sort_keys,
            'show_deleted': show_deleted,
            'rev': rev
        }
        return ans
Ejemplo n.º 16
0
    def export_with_norms(self,
                          subcorpattrs='',
                          format_num=True,
                          ret_nums=True,
                          subcnorm='tokens'):
        """
        Returns a text types table containing also an information about
        total occurrences of respective attribute values.

        See corplib.texttype_values for arguments and returned value
        """
        ans = {}
        if not subcorpattrs:
            subcorpattrs = self._corp.get_conf('SUBCORPATTRS')
            if not subcorpattrs:
                subcorpattrs = self._corp.get_conf('FULLREF')
        if not subcorpattrs or subcorpattrs == '#':
            raise TextTypesException(
                _('Missing display configuration of structural attributes (SUBCORPATTRS or FULLREF).'
                  ))

        corpus_info = plugins.get('corparch').get_corpus_info(self._corpname)
        maxlistsize = settings.get_int('global', 'max_attr_list_size')
        # if 'live_attributes' are installed then always shrink bibliographical
        # entries even if their count is < maxlistsize
        subcorp_attr_list = re.split(r'\s*[,|]\s*', subcorpattrs)

        if plugins.has_plugin('live_attributes'):
            ans['bib_attr'] = corpus_info['metadata']['label_attr']
            list_none = (ans['bib_attr'], )
            tmp = [s for s in subcorp_attr_list]  # making copy here
            if ans['bib_attr'] and ans[
                    'bib_attr'] not in tmp:  # if bib type is not in subcorpattrs
                tmp.append(ans['bib_attr'])  # we add it there
                subcorpattrs = '|'.join(
                    tmp)  # we ignore NoSkE '|' vs. ',' stuff deliberately here
        else:
            ans['bib_attr'] = None
            list_none = ()

        tt = self._tt_cache.get_values(
            corp=self._corp,
            subcorpattrs=subcorpattrs,
            maxlistsize=maxlistsize,
            shrink_list=list_none,
            collator_locale=corpus_info.collator_locale)
        self._add_tt_custom_metadata(tt)

        if ret_nums:
            struct_calc = collections.OrderedDict()
            for item in subcorp_attr_list:
                k = item.split('.')[0]
                struct_calc[k] = CachedStructNormsCalc(self._corp,
                                                       k,
                                                       subcnorm,
                                                       db=plugins.get('db'))
            for col in reduce(lambda p, c: p + c['Line'], tt, []):
                if 'textboxlength' not in col:
                    structname, attrname = col['name'].split('.')
                    for val in col['Values']:
                        v = struct_calc[structname].compute_norm(
                            attrname, val['v'])
                        val['xcnt'] = l10n.format_number(
                            v) if format_num else v
            ans['Blocks'] = tt
            ans['Normslist'] = self._get_normslist(struct_calc.keys()[0])
        else:
            ans['Blocks'] = tt
            ans['Normslist'] = []
        return ans