def ajax_subcorp_info(self, subcname=''): sc = self.cm.get_Corpus(self.args.corpname, subcname) return { 'subCorpusName': subcname, 'corpusSize': format_number(sc.size()), 'subCorpusSize': format_number(sc.search_size()) }
def ajax_get_corp_details(self, request): """ """ corp_conf_info = plugins.get('corparch').get_corpus_info(request.args['corpname']) corpus = self.cm.get_Corpus(request.args['corpname']) encoding = corpus.get_conf('ENCODING') ans = { 'corpname': l10n.import_string(self._canonical_corpname(corpus.get_conf('NAME')), from_encoding=encoding), 'description': l10n.import_string(corpus.get_info(), from_encoding=encoding), 'size': l10n.format_number(int(corpus.size())), 'attrlist': [], 'structlist': [], 'web_url': corp_conf_info['web'] if corp_conf_info is not None else '' } try: ans['attrlist'] = [{'name': item, 'size': l10n.format_number(int(corpus.get_attr(item).id_range()))} for item in corpus.get_conf('ATTRLIST').split(',')] except RuntimeError as e: logging.getLogger(__name__).warn('%s' % e) ans['attrlist'] = {'error': _('Failed to load')} ans['structlist'] = [{'name': item, 'size': l10n.format_number(int(corpus.get_struct(item).size()))} for item in corpus.get_conf('STRUCTLIST').split(',')] return ans
def ajax_get_corp_details(self, request): """ """ corp_conf_info = self.get_corpus_info(request.args['corpname']) corpus = self.cm.get_Corpus(request.args['corpname']) citation_info = corp_conf_info.get('citation_info', None) citation_info = citation_info.to_dict() if citation_info else {} import_str = partial(l10n.import_string, from_encoding=corpus.get_conf('ENCODING')) if corpus.get_conf('NAME'): corpus_name = corpus.get_conf('NAME') else: corpus_name = corpus.corpname ans = { 'corpname': import_str(corpus_name), 'description': import_str(corpus.get_info()), 'size': l10n.format_number(int(corpus.size())), 'attrlist': [], 'structlist': [], 'web_url': corp_conf_info['web'] if corp_conf_info is not None else '', 'citation_info': citation_info } try: ans['attrlist'] = [{'name': item, 'size': l10n.format_number(int(corpus.get_attr(item).id_range()))} for item in corpus.get_conf('ATTRLIST').split(',')] except RuntimeError as e: logging.getLogger(__name__).warn('%s' % e) ans['attrlist'] = {'error': _('Failed to load')} ans['structlist'] = [{'name': item, 'size': l10n.format_number(int(corpus.get_struct(item).size()))} for item in corpus.get_conf('STRUCTLIST').split(',')] return ans
def ajax_subcorp_info(self, subcname=''): sc = self.cm.get_Corpus(self.args.corpname, subcname) ans = { 'corpusName': self._canonical_corpname(self.args.corpname), 'subCorpusName': subcname, 'corpusSize': format_number(sc.size()), 'subCorpusSize': format_number(sc.search_size()), 'extended_info': {} } if plugins.has_plugin('subc_restore'): tmp = plugins.get('subc_restore').get_info(self._session_get('user', 'id'), self.args.corpname, subcname) if tmp: ans['extended_info'].update(tmp) return ans
def format_data_types(data): if type(data) is dict: for k in data.keys(): if type(data[k]) is str and data[k].isdigit(): data[k] = int(data[k]) if type(data[k]) is int or type(data[k]) is float: data[k] = l10n.format_number(data[k]) return data
def ajax_get_corp_details(self, request): """ """ corp_conf_info = plugins.get('corparch').get_corpus_info( request.args['corpname']) corpus = self.cm.get_Corpus(request.args['corpname']) citation_info = corp_conf_info.get('citation_info', None) citation_info = citation_info.to_dict() if citation_info else {} import_str = partial(l10n.import_string, from_encoding=corpus.get_conf('ENCODING')) if corpus.get_conf('NAME'): corpus_name = corpus.get_conf('NAME') else: corpus_name = self._canonical_corpname(corpus.corpname) ans = { 'corpname': import_str(corpus_name), 'description': import_str(corpus.get_info()), 'size': l10n.format_number(int(corpus.size())), 'attrlist': [], 'structlist': [], 'web_url': corp_conf_info['web'] if corp_conf_info is not None else '', 'citation_info': citation_info } try: ans['attrlist'] = [{ 'name': item, 'size': l10n.format_number(int(corpus.get_attr(item).id_range())) } for item in corpus.get_conf('ATTRLIST').split(',')] except RuntimeError as e: logging.getLogger(__name__).warn('%s' % e) ans['attrlist'] = {'error': _('Failed to load')} ans['structlist'] = [{ 'name': item, 'size': l10n.format_number(int(corpus.get_struct(item).size())) } for item in corpus.get_conf('STRUCTLIST').split(',')] return ans
def export_with_norms(self, subcorpattrs='', format_num=True, ret_nums=True, subcnorm='tokens'): """ Returns a text types table containing also an information about total occurrences of respective attribute values. See corplib.texttype_values for arguments and returned value """ ans = {} if not subcorpattrs: subcorpattrs = self._corp.get_conf('SUBCORPATTRS') if not subcorpattrs: subcorpattrs = self._corp.get_conf('FULLREF') if not subcorpattrs or subcorpattrs == '#': raise TextTypesException( _('Missing display configuration of structural attributes (SUBCORPATTRS or FULLREF).')) corpus_info = plugins.get('corparch').get_corpus_info(self._corpname) maxlistsize = settings.get_int('global', 'max_attr_list_size') # if 'live_attributes' are installed then always shrink bibliographical # entries even if their count is < maxlistsize subcorp_attr_list = re.split(r'\s*[,|]\s*', subcorpattrs) if plugins.has_plugin('live_attributes'): ans['bib_attr'] = corpus_info['metadata']['label_attr'] list_none = (ans['bib_attr'], ) tmp = [s for s in subcorp_attr_list] # making copy here if ans['bib_attr'] and ans['bib_attr'] not in tmp: # if bib type is not in subcorpattrs tmp.append(ans['bib_attr']) # we add it there subcorpattrs = '|'.join(tmp) # we ignore NoSkE '|' vs. ',' stuff deliberately here else: ans['bib_attr'] = None list_none = () tt = self._tt_cache.get_values(corp=self._corp, subcorpattrs=subcorpattrs, maxlistsize=maxlistsize, shrink_list=list_none, collator_locale=corpus_info.collator_locale) self._add_tt_custom_metadata(tt) if ret_nums: struct_calc = collections.OrderedDict() for item in subcorp_attr_list: k = item.split('.')[0] struct_calc[k] = CachedStructNormsCalc(self._corp, k, subcnorm, db=plugins.get('db')) for col in reduce(lambda p, c: p + c['Line'], tt, []): if 'textboxlength' not in col: structname, attrname = col['name'].split('.') for val in col['Values']: v = struct_calc[structname].compute_norm(attrname, val['v']) val['xcnt'] = l10n.format_number(v) if format_num else v ans['Blocks'] = tt ans['Normslist'] = self._get_normslist(struct_calc.keys()[0]) else: ans['Blocks'] = tt ans['Normslist'] = [] return ans
def ajax_subcorp_info(self, subcname=''): sc = self.cm.get_Corpus(self.args.corpname, subcname=subcname) ans = dict(corpusId=self.args.corpname, corpusName=self._human_readable_corpname(), subCorpusName=subcname, origSubCorpusName=sc.orig_subcname if sc.is_published else subcname, corpusSize=format_number(sc.size()), subCorpusSize=format_number(sc.search_size()), created=time.strftime(l10n.datetime_formatting(), sc.created.timetuple()), description=sc.description, extended_info={}) if plugins.runtime.SUBC_RESTORE.exists: with plugins.runtime.SUBC_RESTORE as sr: tmp = sr.get_info(self.session_get('user', 'id'), self.args.corpname, subcname) if tmp: ans['extended_info'].update(tmp) return ans
def ajax_subcorp_info(self, subcname=''): sc = self.cm.get_Corpus(self.args.corpname, subcname) ans = { 'corpusName': self._canonical_corpname(self.args.corpname), 'subCorpusName': subcname, 'corpusSize': format_number(sc.size()), 'subCorpusSize': format_number(sc.search_size()), 'created': time.strftime(l10n.datetime_formatting(), sc.created.timetuple()), 'extended_info': {} } if plugins.runtime.SUBC_RESTORE.exists: with plugins.runtime.SUBC_RESTORE as sr: tmp = sr.get_info(self.session_get('user', 'id'), self.args.corpname, subcname) if tmp: ans['extended_info'].update(tmp) return ans
def export_with_norms(self, subcorpattrs='', format_num=True, ret_nums=True, subcnorm='tokens'): """ Returns a text types table containing also an information about total occurrences of respective attribute values. See corplib.texttype_values for arguments and returned value """ ans = {} if not subcorpattrs: subcorpattrs = self._corp.get_conf('SUBCORPATTRS') if not subcorpattrs: subcorpattrs = self._corp.get_conf('FULLREF') if not subcorpattrs or subcorpattrs == '#': raise TextTypesException( _('Missing display configuration of structural attributes (SUBCORPATTRS or FULLREF).' )) corpus_info = plugins.get('corparch').get_corpus_info(self._corpname) maxlistsize = settings.get_int('global', 'max_attr_list_size') # if 'live_attributes' are installed then always shrink bibliographical # entries even if their count is < maxlistsize subcorp_attr_list = re.split(r'\s*[,|]\s*', subcorpattrs) if plugins.has_plugin('live_attributes'): ans['bib_attr'] = corpus_info['metadata']['label_attr'] list_none = (ans['bib_attr'], ) tmp = [s for s in subcorp_attr_list] # making copy here if ans['bib_attr'] and ans[ 'bib_attr'] not in tmp: # if bib type is not in subcorpattrs tmp.append(ans['bib_attr']) # we add it there subcorpattrs = '|'.join( tmp) # we ignore NoSkE '|' vs. ',' stuff deliberately here else: ans['bib_attr'] = None list_none = () tt = self._tt_cache.get_values( corp=self._corp, subcorpattrs=subcorpattrs, maxlistsize=maxlistsize, shrink_list=list_none, collator_locale=corpus_info.collator_locale) self._add_tt_custom_metadata(tt) if ret_nums: struct_calc = collections.OrderedDict() for item in subcorp_attr_list: k = item.split('.')[0] struct_calc[k] = CachedStructNormsCalc(self._corp, k, subcnorm, db=plugins.get('db')) for col in reduce(lambda p, c: p + c['Line'], tt, []): if 'textboxlength' not in col: structname, attrname = col['name'].split('.') for val in col['Values']: v = struct_calc[structname].compute_norm( attrname, val['v']) val['xcnt'] = l10n.format_number( v) if format_num else v ans['Blocks'] = tt ans['Normslist'] = self._get_normslist(struct_calc.keys()[0]) else: ans['Blocks'] = tt ans['Normslist'] = [] return ans
def get_attr_values(self, corpus, attr_map, aligned_corpora=None): """ Finds all the available values of remaining attributes according to the provided attr_map and aligned_corpora arguments: corpus -- manatee.corpus object attr_map -- a dictionary of attributes and values as selected by a user aligned_corpora - a list/tuple of corpora names aligned to base one (the 'corpus' argument) returns: a dictionary containing matching attributes and values """ corpname = vanilla_corpname(corpus.corpname) corpus_info = self.corparch.get_corpus_info(corpname) bib_label = LiveAttributes.import_key(corpus_info.metadata.label_attr) bib_id = LiveAttributes.import_key(corpus_info.metadata.id_attr) attrs = self._get_subcorp_attrs(corpus) if bib_label and bib_label not in attrs: attrs.append(bib_label) srch_attrs = set(attrs) - set( self.import_key(k) for k in attr_map.keys() if type(attr_map[k]) is not dict) srch_attrs.add('poscount') hidden_attrs = set() if bib_id is not None and bib_id not in srch_attrs: hidden_attrs.add(bib_id) if not bib_id: hidden_attrs.add('id') selected_attrs = tuple(srch_attrs.union(hidden_attrs)) # a map [db_col_name]=>[db_col_idx] srch_attr_map = dict([(x[1], x[0]) for x in enumerate(selected_attrs)]) attr_items = AttrArgs(attr_map, self.empty_val_placeholder) where_sql, where_values = attr_items.export_sql('t1', corpname) join_sql = [] i = 2 for item in aligned_corpora: join_sql.append('JOIN item AS t%d ON t1.item_id = t%d.item_id' % (i, i)) where_sql += ' AND t%d.corpus_id = ?' % i where_values.append(item) i += 1 if len(where_sql) > 0: sql_template = "SELECT DISTINCT %s FROM item AS t1 %s WHERE %s" \ % (', '.join(self.apply_prefix(selected_attrs, 't1')), ' '.join(join_sql), where_sql) else: sql_template = "SELECT DISTINCT %s FROM item AS t1 %s " \ % (', '.join(self.apply_prefix(selected_attrs, 't1')), ' '.join(join_sql)) ans = {} # already selected items are part of the answer; no need to fetch them from db ans.update(dict([(self.import_key(k), v) for k, v in attr_map.items()])) range_attrs = set() for attr in ans.keys(): if type(ans[attr]) is dict: ans[attr] = set( ) # currently we throw away the range and load all the stuff range_attrs.add(attr) for attr in srch_attrs: if attr in ('poscount', ): ans[attr] = 0 else: ans[attr] = set() poscounts = defaultdict(lambda: defaultdict(lambda: 0)) max_visible_chars = self.calc_max_attr_val_visible_chars(corpus_info) for item in self.db(corpname).execute(sql_template, *where_values).fetchall(): for attr in selected_attrs: v = item[srch_attr_map[attr]] if v is not None and attr not in hidden_attrs: attr_val = None if attr == bib_label: attr_val = (self.shorten_value( unicode(v), length=max_visible_chars), item[srch_attr_map[bib_id]], unicode(v)) elif type(ans[attr]) is set: attr_val = (self.shorten_value( unicode(v), length=max_visible_chars), v, v) elif type(ans[attr]) is int: ans[attr] += int(v) if attr_val is not None: poscounts[attr][attr_val] += item['poscount'] # here we append position count information to the respective items for attr, v in poscounts.items(): for k, c in v.items(): ans[attr].add(k + (l10n.format_number(c), )) del poscounts[attr] exported = {} collator_locale = corpus_info.collator_locale for k in ans.keys(): if type(ans[k]) is set: if len(ans[k]) <= self.max_attr_list_size or k in range_attrs: if k == bib_label: out_data = l10n.sort(ans[k], collator_locale, key=lambda t: t[0]) else: out_data = tuple( l10n.sort(ans[k], collator_locale, key=lambda t: t[0])) exported[self.export_key(k)] = out_data else: exported[self.export_key(k)] = {'length': len(ans[k])} else: exported[self.export_key(k)] = ans[k] exported['poscount'] = l10n.format_number(exported['poscount']) exported['aligned'] = aligned_corpora return exported
def get_attr_values(self, corpus, attr_map, aligned_corpora=None): """ Finds all the available values of remaining attributes according to the provided attr_map and aligned_corpora arguments: corpus -- manatee.corpus object attr_map -- a dictionary of attributes and values as selected by a user aligned_corpora - a list/tuple of corpora names aligned to base one (the 'corpus' argument) returns: a dictionary containing matching attributes and values """ corpname = vanilla_corpname(corpus.corpname) corpus_info = self.corparch.get_corpus_info(corpname) bib_label = LiveAttributes.import_key(corpus_info.metadata.label_attr) bib_id = LiveAttributes.import_key(corpus_info.metadata.id_attr) attrs = self._get_subcorp_attrs(corpus) if bib_label and bib_label not in attrs: attrs.append(bib_label) srch_attrs = set(attrs) - set(self.import_key(k) for k in attr_map.keys() if type(attr_map[k]) is not dict) srch_attrs.add('poscount') hidden_attrs = set() if bib_id is not None and bib_id not in srch_attrs: hidden_attrs.add(bib_id) if not bib_id: hidden_attrs.add('id') selected_attrs = tuple(srch_attrs.union(hidden_attrs)) # a map [db_col_name]=>[db_col_idx] srch_attr_map = dict([(x[1], x[0]) for x in enumerate(selected_attrs)]) attr_items = AttrArgs(attr_map, self.empty_val_placeholder) where_sql, where_values = attr_items.export_sql('t1', corpname) join_sql = [] i = 2 for item in aligned_corpora: join_sql.append('JOIN item AS t%d ON t1.item_id = t%d.item_id' % (i, i)) where_sql += ' AND t%d.corpus_id = ?' % i where_values.append(item) i += 1 if len(where_sql) > 0: sql_template = "SELECT DISTINCT %s FROM item AS t1 %s WHERE %s" \ % (', '.join(self.apply_prefix(selected_attrs, 't1')), ' '.join(join_sql), where_sql) else: sql_template = "SELECT DISTINCT %s FROM item AS t1 %s " \ % (', '.join(self.apply_prefix(selected_attrs, 't1')), ' '.join(join_sql)) ans = {} # already selected items are part of the answer; no need to fetch them from db ans.update(dict([(self.import_key(k), v) for k, v in attr_map.items()])) range_attrs = set() for attr in ans.keys(): if type(ans[attr]) is dict: ans[attr] = set() # currently we throw away the range and load all the stuff range_attrs.add(attr) for attr in srch_attrs: if attr in ('poscount',): ans[attr] = 0 else: ans[attr] = set() poscounts = defaultdict(lambda: defaultdict(lambda: 0)) max_visible_chars = self.calc_max_attr_val_visible_chars(corpus_info) for item in self.db(corpname).execute(sql_template, *where_values).fetchall(): for attr in selected_attrs: v = item[srch_attr_map[attr]] if v is not None and attr not in hidden_attrs: attr_val = None if attr == bib_label: attr_val = (self.shorten_value(unicode(v), length=max_visible_chars), item[srch_attr_map[bib_id]], unicode(v)) elif type(ans[attr]) is set: attr_val = (self.shorten_value(unicode(v), length=max_visible_chars), v, v) elif type(ans[attr]) is int: ans[attr] += int(v) if attr_val is not None: poscounts[attr][attr_val] += item['poscount'] # here we append position count information to the respective items for attr, v in poscounts.items(): for k, c in v.items(): ans[attr].add(k + (l10n.format_number(c),)) del poscounts[attr] exported = {} collator_locale = corpus_info.collator_locale for k in ans.keys(): if type(ans[k]) is set: if len(ans[k]) <= self.max_attr_list_size or k in range_attrs: if k == bib_label: out_data = l10n.sort(ans[k], collator_locale, key=lambda t: t[0]) else: out_data = tuple(l10n.sort(ans[k], collator_locale, key=lambda t: t[0])) exported[self.export_key(k)] = out_data else: exported[self.export_key(k)] = {'length': len(ans[k])} else: exported[self.export_key(k)] = ans[k] exported['poscount'] = l10n.format_number(exported['poscount']) exported['aligned'] = aligned_corpora return exported
def filter(self, val, **kw): if val: return format_number(val, mask='%01.2f') return str(val)
def filter(self, val, **kw): if val: return format_number(val) return str(val)