def _load_query_history(self, offset, limit, from_date, to_date, query_type, current_corpus): if plugins.has_plugin('query_storage'): from query_history import Export if current_corpus: corpname = self.args.corpname else: corpname = None exporter = Export(corpus_manager=self.cm, corpname_canonizer=self._canonical_corpname, url_creator=self.create_url) rows = plugins.get('query_storage').get_user_queries( self._session_get('user', 'id'), offset=offset, limit=limit, query_type=query_type, corpname=corpname, from_date=from_date, to_date=to_date) rows = filter(lambda x: x is not None, [exporter.export_row(row) for row in rows]) else: rows = () return rows
def get_lang(environ): """ Detects user's preferred language (either via the 'getlang' plugin or from HTTP_ACCEPT_LANGUAGE env value) arguments: environ -- WSGI environment variable returns: underscore-separated ISO 639 language code and ISO 3166 country code """ installed = dict([ (x.split('_')[0], x) for x in os.listdir('%s/../locale' % os.path.dirname(__file__)) ]) if plugins.has_plugin('getlang'): lgs_string = plugins.get('getlang').fetch_current_language( KonTextCookie(environ.get('HTTP_COOKIE', ''))) else: lgs_string = parse_accept_header( environ.get('HTTP_ACCEPT_LANGUAGE')).best if len(lgs_string ) == 2: # in case we obtain just an ISO 639 language code lgs_string = installed.get(lgs_string) else: lgs_string = lgs_string.replace('-', '_') if lgs_string is None: lgs_string = 'en_US' return lgs_string
def _create_subcorpus(self, request): """ req. arguments: subcname -- name of new subcorpus create -- bool, sets whether to create new subcorpus cql -- custom within condition """ subcname = request.form['subcname'] within_json = request.form.get('within_json') raw_cql = request.form.get('cql') corp_encoding = self._corp().get_conf('ENCODING') if raw_cql: tt_query = () within_cql = raw_cql full_cql = 'aword,[] %s' % raw_cql imp_cql = (full_cql,) elif within_json: # user entered a subcorpus query manually tt_query = () within_cql = self._deserialize_custom_within(json.loads(within_json)) full_cql = 'aword,[] %s' % within_cql imp_cql = (full_cql,) else: tt_query = TextTypeCollector(self._corp(), request).get_query() full_cql = ' within '.join(['<%s %s />' % item for item in tt_query]) full_cql = 'aword,[] within %s' % full_cql full_cql = import_string(full_cql, from_encoding=corp_encoding) imp_cql = (full_cql,) basecorpname = self.args.corpname.split(':')[0] if not subcname: raise UserActionException(_('No subcorpus name specified!')) path = self.prepare_subc_path(basecorpname, subcname) if type(path) == unicode: path = path.encode('utf-8') if len(tt_query) == 1: result = corplib.create_subcorpus(path, self._corp(), tt_query[0][0], tt_query[0][1]) elif len(tt_query) > 1 or within_cql: conc = conclib.get_conc(self._corp(), self._session_get('user', 'user'), q=imp_cql) conc.sync() struct = self._corp().get_struct(tt_query[0][0]) if len(tt_query) == 1 else None result = corplib.subcorpus_from_conc(path, conc, struct) else: raise UserActionException(_('Nothing specified!')) if result: if plugins.has_plugin('subc_restore'): try: plugins.get('subc_restore').store_query(user_id=self._session_get('user', 'id'), corpname=self.args.corpname, subcname=subcname, cql=full_cql.split('[]')[-1]) except Exception as e: logging.getLogger(__name__).warning('Failed to store subcorpus query: %s' % e) self.add_system_message('warning', _('Subcorpus created but there was a problem saving a backup copy.')) return {} else: raise ConcError(_('Empty subcorpus!'))
def _create_subcorpus(self, request): """ req. arguments: subcname -- name of new subcorpus create -- bool, sets whether to create new subcorpus within_condition -- custom within condition; if non-empty then clickable form is omitted within_struct -- a structure the within_condition will be applied to """ subcname = request.form['subcname'] within_condition = request.form['within_condition'] within_struct = request.form['within_struct'] corp_encoding = self._corp().get_conf('ENCODING') if within_condition and within_struct: # user entered a subcorpus query manually tt_query = [(export_string(within_struct, to_encoding=corp_encoding), export_string(within_condition, to_encoding=corp_encoding))] else: tt_query = self._texttype_query(request) within_struct = import_string(tt_query[0][0], from_encoding=corp_encoding) within_condition = import_string(tt_query[0][1], from_encoding=corp_encoding) basecorpname = self.args.corpname.split(':')[0] if not subcname: raise ConcError(_('No subcorpus name specified!')) path = os.path.join(self.subcpath[-1], basecorpname) if not os.path.isdir(path): os.makedirs(path) path = os.path.join(path, subcname) + '.subc' if not tt_query: raise ConcError(_('Nothing specified!')) # Even if _texttype_query() parsed multiple structures into tt_query, # Manatee can accept directly only one (but with arbitrarily complex attribute # condition). # For this reason, we choose only the first struct+condition pair. # It is up to the user interface to deal with it. structname, subquery = tt_query[0] if type(path) == unicode: path = path.encode("utf-8") if corplib.create_subcorpus(path, self._corp(), structname, subquery): if plugins.has_plugin('subc_restore'): try: plugins.get('subc_restore').store_query(user_id=self._session_get('user', 'id'), corpname=self.args.corpname, subcname=subcname, structname=within_struct, condition=within_condition) except Exception as e: logging.getLogger(__name__).warning('Failed to store subcorpus query: %s' % e) self.add_system_message('warning', _('Subcorpus created but there was a problem saving a backup copy.')) return {} else: raise ConcError(_('Empty subcorpus!'))
def ajax_wipe_subcorpus(self, request): if plugins.has_plugin('subc_restore'): corpus_id = request.form['corpname'] subcorp_name = request.form['subcname'] plugins.get('subc_restore').delete_query(self._session_get('user', 'id'), corpus_id, subcorp_name) self.add_system_message('info', _('Subcorpus %s has been deleted permanently.') % subcorp_name) else: self.add_system_message('error', _('Unsupported operation (plug-in not present)')) return {}
def export_with_norms(self, subcorpattrs='', format_num=True, ret_nums=True, subcnorm='tokens'): """ Returns a text types table containing also an information about total occurrences of respective attribute values. See corplib.texttype_values for arguments and returned value """ ans = {} if not subcorpattrs: subcorpattrs = self._corp.get_conf('SUBCORPATTRS') if not subcorpattrs: subcorpattrs = self._corp.get_conf('FULLREF') if not subcorpattrs or subcorpattrs == '#': raise TextTypesException( _('Missing display configuration of structural attributes (SUBCORPATTRS or FULLREF).')) corpus_info = plugins.get('corparch').get_corpus_info(self._corpname) maxlistsize = settings.get_int('global', 'max_attr_list_size') # if 'live_attributes' are installed then always shrink bibliographical # entries even if their count is < maxlistsize subcorp_attr_list = re.split(r'\s*[,|]\s*', subcorpattrs) if plugins.has_plugin('live_attributes'): ans['bib_attr'] = corpus_info['metadata']['label_attr'] list_none = (ans['bib_attr'], ) tmp = [s for s in subcorp_attr_list] # making copy here if ans['bib_attr'] and ans['bib_attr'] not in tmp: # if bib type is not in subcorpattrs tmp.append(ans['bib_attr']) # we add it there subcorpattrs = '|'.join(tmp) # we ignore NoSkE '|' vs. ',' stuff deliberately here else: ans['bib_attr'] = None list_none = () tt = self._tt_cache.get_values(corp=self._corp, subcorpattrs=subcorpattrs, maxlistsize=maxlistsize, shrink_list=list_none, collator_locale=corpus_info.collator_locale) self._add_tt_custom_metadata(tt) if ret_nums: struct_calc = collections.OrderedDict() for item in subcorp_attr_list: k = item.split('.')[0] struct_calc[k] = CachedStructNormsCalc(self._corp, k, subcnorm, db=plugins.get('db')) for col in reduce(lambda p, c: p + c['Line'], tt, []): if 'textboxlength' not in col: structname, attrname = col['name'].split('.') for val in col['Values']: v = struct_calc[structname].compute_norm(attrname, val['v']) val['xcnt'] = l10n.format_number(v) if format_num else v ans['Blocks'] = tt ans['Normslist'] = self._get_normslist(struct_calc.keys()[0]) else: ans['Blocks'] = tt ans['Normslist'] = [] return ans
def load_topbar(self): if plugins.has_plugin('application_bar'): import urlparse html = plugins.get('application_bar').get_contents(self.cookies, 'en', '/') parts = urlparse.urlparse(self.environ['REQUEST_URI']) css = '<link rel="stylesheet" type="text/css" href="%s://www.korpus.cz/toolbar/css/cnc-toolbar.css" />' \ % parts.scheme else: html = '' css = '' return html, css
def load_topbar(self): if plugins.has_plugin('application_bar'): import urlparse html = plugins.get('application_bar').get_contents( self.cookies, 'en', '/') parts = urlparse.urlparse(self.environ['REQUEST_URI']) css = '<link rel="stylesheet" type="text/css" href="%s://www.korpus.cz/toolbar/css/cnc-toolbar.css" />' \ % parts.scheme else: html = '' css = '' return html, css
def ajax_subcorp_info(self, subcname=''): sc = self.cm.get_Corpus(self.args.corpname, subcname) ans = { 'corpusName': self._canonical_corpname(self.args.corpname), 'subCorpusName': subcname, 'corpusSize': format_number(sc.size()), 'subCorpusSize': format_number(sc.search_size()), 'extended_info': {} } if plugins.has_plugin('subc_restore'): tmp = plugins.get('subc_restore').get_info(self._session_get('user', 'id'), self.args.corpname, subcname) if tmp: ans['extended_info'].update(tmp) return ans
def _load_query_history(self, offset, limit, from_date, to_date, query_type, current_corpus): if plugins.has_plugin('query_storage'): from query_history import Export if current_corpus: corpname = self.args.corpname else: corpname = None exporter = Export(corpus_manager=self.cm, corpname_canonizer=self._canonical_corpname, url_creator=self.create_url) rows = plugins.get('query_storage').get_user_queries( self._session_get('user', 'id'), offset=offset, limit=limit, query_type=query_type, corpname=corpname, from_date=from_date, to_date=to_date) rows = [exporter.export_row(row) for row in rows] else: rows = () return rows
def subcorp_form(self, request, conc_args): """ Displays a form to create a new subcorpus """ self.disabled_menu_items = self.CONCORDANCE_ACTIONS method = request.form.get('method', 'gui') within_json = request.form.get('within_json', 'null') subcname = request.form.get('subcname', None) subcnorm = request.args.get('subcnorm', 'tokens') try: tt_sel = get_tt(self._corp(), self.ui_lang).export_with_norms(subcnorm=subcnorm) except UserActionException as e: tt_sel = {'Normslist': [], 'Blocks': []} self.add_system_message('warning', e) structs_and_attrs = {} for s, a in [t.split('.') for t in self._corp().get_conf('STRUCTATTRLIST').split(',')]: if s not in structs_and_attrs: structs_and_attrs[s] = [] structs_and_attrs[s].append(a) out = {'SubcorpList': ()} if self.environ['REQUEST_METHOD'] == 'POST': self._store_checked_text_types(request.form, out) if plugins.has_plugin('subcmixer'): out['subcmixer_form_data'] = plugins.get('subcmixer').form_data(self._plugin_api) else: out['subcmixer_form_data'] = {} out.update({ 'TextTypeSel': tt_sel, 'structs_and_attrs': structs_and_attrs, 'method': method, 'within_json': within_json, 'subcname': subcname, 'subcnorm': subcnorm }) return out
def get_lang(environ): """ Detects user's preferred language (either via the 'getlang' plugin or from HTTP_ACCEPT_LANGUAGE env value) arguments: environ -- WSGI environment variable returns: underscore-separated ISO 639 language code and ISO 3166 country code """ installed = dict([(x.split('_')[0], x) for x in os.listdir('%s/../locale' % os.path.dirname(__file__))]) if plugins.has_plugin('getlang'): lgs_string = plugins.get('getlang').fetch_current_language(KonTextCookie( environ.get('HTTP_COOKIE', ''))) else: lgs_string = parse_accept_header(environ.get('HTTP_ACCEPT_LANGUAGE')).best if len(lgs_string) == 2: # in case we obtain just an ISO 639 language code lgs_string = installed.get(lgs_string) else: lgs_string = lgs_string.replace('-', '_') if lgs_string is None: lgs_string = 'en_US' return lgs_string
def subcorp_list(self, request): """ Displays a list of user subcorpora. In case there is a 'subc_restore' plug-in installed then the list is enriched by additional re-use/undelete information. """ self.disabled_menu_items = (MainMenu.VIEW, MainMenu.FILTER, MainMenu.FREQUENCY, MainMenu.COLLOCATIONS, MainMenu.SAVE, MainMenu.CONCORDANCE) sort = 'n' # TODO show_deleted = int(request.args.get('show_deleted', 0)) current_corp = self.args.corpname if self.get_http_method() == 'POST': selected_subc = request.form.getlist('selected_subc') self._delete_subcorpora(selected_subc) data = [] for corp in plugins.get('auth').permitted_corpora(self._session_get('user', 'id')).values(): try: self.cm.get_Corpus(corp) basecorpname = corp.split(':')[0] for item in self.cm.subcorp_names(basecorpname): sc = self.cm.get_Corpus(corp, item['n']) subc_id = '%s:%s' % (corp, item['n']) data.append({ 'n': subc_id, 'v': item['n'], 'size': sc.search_size(), 'created': sc.created, 'corpname': corp, 'usesubcorp': item['n'], 'deleted': False }) except Exception as e: for d in data: # permitted_corpora does this d['usesubcorp'] = werkzeug.urls.url_quote(d['usesubcorp'], unsafe='+') logging.getLogger(__name__).warn( 'Failed to fetch information about subcorpus of [%s]: %s' % (corp, e)) if plugins.has_plugin('subc_restore'): try: full_list = plugins.get('subc_restore').extend_subc_list( data, self._session_get('user', 'id'), bool(show_deleted), 0) except Exception as e: logging.getLogger(__name__).error('subc_restore plug-in failed to list queries: %s' % e) full_list = [] else: full_list = data # TODO sorting does not work sort_key, rev = Kontext._parse_sorting_param(sort) if sort_key in ('size', 'created'): data = sorted(data, key=lambda x: x[sort_key], reverse=rev) else: data = l10n.sort(data, loc=self.ui_lang, key=lambda x: x[sort_key], reverse=rev) sort_keys = dict([(x, (x, '')) for x in ('n', 'size', 'created')]) if not rev: sort_keys[sort_key] = ('-%s' % sort_key, '↑') else: sort_keys[sort_key] = (sort_key, '↓') # this is necessary to reset manatee module back to its original state self.cm.get_Corpus(current_corp) ans = { 'subcorp_list': full_list, 'sort_keys': sort_keys, 'show_deleted': show_deleted, 'rev': rev } self._export_subcorpora_list(ans) return ans
def subcorp_list(self, request): """ Displays a list of user subcorpora. In case there is a 'subc_restore' plug-in installed then the list is enriched by additional re-use/undelete information. """ self.disabled_menu_items = (MainMenu.VIEW, MainMenu.FILTER, MainMenu.FREQUENCY, MainMenu.COLLOCATIONS, MainMenu.SAVE, MainMenu.CONCORDANCE) sort = 'n' # TODO show_deleted = int(request.args.get('show_deleted', 0)) if self.get_http_method() == 'POST': selected_subc = request.form.getlist('selected_subc') self._delete_subcorpora(selected_subc) data = [] user_corpora = plugins.get('auth').permitted_corpora(self._session_get('user', 'id')).values() for corp in user_corpora: try: for item in self.cm.subcorp_names(corp): sc = self.cm.get_Corpus(corp, item['n']) data.append({ 'n': '%s:%s' % (self._canonical_corpname(corp), item['n']), 'internal_n': '%s:%s' % (corp, item['n']), 'v': item['n'], 'size': sc.search_size(), 'created': sc.created, 'corpname': corp, 'human_corpname': sc.get_conf('NAME'), 'usesubcorp': item['n'], 'deleted': False }) except Exception as e: for d in data: # permitted_corpora does this d['usesubcorp'] = werkzeug.urls.url_quote(d['usesubcorp'], unsafe='+') logging.getLogger(__name__).warn( 'Failed to fetch information about subcorpus of [%s]: %s' % (corp, e)) if plugins.has_plugin('subc_restore'): try: full_list = plugins.get('subc_restore').extend_subc_list( data, self._session_get('user', 'id'), self._canonical_corpname, bool(show_deleted), 0) except Exception as e: logging.getLogger(__name__).error('subc_restore plug-in failed to list queries: %s' % e) full_list = [] else: full_list = data # TODO sorting does not work sort_key, rev = Kontext._parse_sorting_param(sort) if sort_key in ('size', 'created'): data = sorted(data, key=lambda x: x[sort_key], reverse=rev) else: data = l10n.sort(data, loc=self.ui_lang, key=lambda x: x[sort_key], reverse=rev) sort_keys = dict([(x, (x, '')) for x in ('n', 'size', 'created')]) if not rev: sort_keys[sort_key] = ('-%s' % sort_key, '↑') else: sort_keys[sort_key] = (sort_key, '↓') ans = { 'SubcorpList': [], # this is used by subcorpus SELECT element; no need for that here 'subcorp_list': full_list, 'sort_keys': sort_keys, 'show_deleted': show_deleted, 'rev': rev } return ans
def export_with_norms(self, subcorpattrs='', format_num=True, ret_nums=True, subcnorm='tokens'): """ Returns a text types table containing also an information about total occurrences of respective attribute values. See corplib.texttype_values for arguments and returned value """ ans = {} if not subcorpattrs: subcorpattrs = self._corp.get_conf('SUBCORPATTRS') if not subcorpattrs: subcorpattrs = self._corp.get_conf('FULLREF') if not subcorpattrs or subcorpattrs == '#': raise TextTypesException( _('Missing display configuration of structural attributes (SUBCORPATTRS or FULLREF).' )) corpus_info = plugins.get('corparch').get_corpus_info(self._corpname) maxlistsize = settings.get_int('global', 'max_attr_list_size') # if 'live_attributes' are installed then always shrink bibliographical # entries even if their count is < maxlistsize subcorp_attr_list = re.split(r'\s*[,|]\s*', subcorpattrs) if plugins.has_plugin('live_attributes'): ans['bib_attr'] = corpus_info['metadata']['label_attr'] list_none = (ans['bib_attr'], ) tmp = [s for s in subcorp_attr_list] # making copy here if ans['bib_attr'] and ans[ 'bib_attr'] not in tmp: # if bib type is not in subcorpattrs tmp.append(ans['bib_attr']) # we add it there subcorpattrs = '|'.join( tmp) # we ignore NoSkE '|' vs. ',' stuff deliberately here else: ans['bib_attr'] = None list_none = () tt = self._tt_cache.get_values( corp=self._corp, subcorpattrs=subcorpattrs, maxlistsize=maxlistsize, shrink_list=list_none, collator_locale=corpus_info.collator_locale) self._add_tt_custom_metadata(tt) if ret_nums: struct_calc = collections.OrderedDict() for item in subcorp_attr_list: k = item.split('.')[0] struct_calc[k] = CachedStructNormsCalc(self._corp, k, subcnorm, db=plugins.get('db')) for col in reduce(lambda p, c: p + c['Line'], tt, []): if 'textboxlength' not in col: structname, attrname = col['name'].split('.') for val in col['Values']: v = struct_calc[structname].compute_norm( attrname, val['v']) val['xcnt'] = l10n.format_number( v) if format_num else v ans['Blocks'] = tt ans['Normslist'] = self._get_normslist(struct_calc.keys()[0]) else: ans['Blocks'] = tt ans['Normslist'] = [] return ans