def article_json(id=None): data = karp_query('query', {'q': "extended||and|url|equals|%s" % (id)}) if data['hits']['total'] == 1: page = jsonify(data['hits']['hits'][0]['_source']) return set_cache(page) data = karp_query('query', {'q': "extended||and|id.search|equals|%s" % (id)}) if data['hits']['total'] == 1: page = jsonify(data['hits']['hits'][0]['_source']) return set_cache(page)
def compute_article(lang="", cache=True, url=''): set_language_switch_link("article_index", lang=lang) art, lang = getcache('article', lang, cache) if art is not None: return art show = ','.join( ['name', 'url', 'undertitel', 'lifespan', 'undertitel_eng']) infotext = helpers.get_infotext("article", request.url_rule.rule) if lang == 'sv': data = karp_query('minientry', { 'q': "extended||and|namn|exists", 'show': show, 'sort': 'sorteringsnamn.sort,sorteringsnamn.init,sorteringsnamn,tilltalsnamn.sort,tilltalsnamn' }, mode="skbllinks") else: data = karp_query('minientry', { 'q': "extended||and|namn|exists", 'show': show, 'sort': 'sorteringsnamn.eng_sort,sorteringsnamn.eng_init,sorteringsnamn,tilltalsnamn.sort,tilltalsnamn' }, mode="skbllinks") art = render_template('list.html', hits=data["hits"], headline=gettext(u'Women A-Z'), alphabetic=True, split_letters=True, infotext=infotext, title='Articles', page_url=url) try: with mc_pool.reserve() as client: client.set(cache_name('article', lang), art, time=app.config['CACHE_TIME']) except: # TODO what to do? pass return art
def article(id=None): rule = request.url_rule if 'sv' in rule.rule: lang = "sv" else: lang = "en" pagename = 'article_' + id page = check_cache(pagename, lang=lang) if page is not None: return page data = karp_query('query', {'q': "extended||and|url|equals|%s" % (id)}) if data['hits']['total'] == 0: data = karp_query('query', {'q': "extended||and|id.search|equals|%s" % (id)}) set_language_switch_link("article_index", id) page = show_article(data, lang) return set_cache(page, name=pagename, lang=lang, no_hits=1)
def compute_organisation(lang="", infotext="", cache=True, url=''): set_language_switch_link("organisation_index", lang=lang) art, lang = getcache('organisation', lang, cache) if art is not None: return art infotext = helpers.get_infotext("organisation", request.url_rule.rule) if lang == "en": data = karp_query( 'minientry', { 'q': 'extended||and|anything|regexp|.*', 'show': 'organisationsnamn,organisationstyp_eng' }) typefield = "type_eng" else: data = karp_query( 'minientry', { 'q': 'extended||and|anything|regexp|.*', 'show': 'organisationsnamn,organisationstyp' }) typefield = "type" nested_obj = {} for hit in data['hits']['hits']: for org in hit['_source'].get('organisation', []): orgtype = helpers.unescape(org.get(typefield, '-')) if orgtype not in nested_obj: nested_obj[orgtype] = defaultdict(set) nested_obj[orgtype][org.get('name', '-')].add(hit['_id']) art = render_template('nestedbucketresults.html', results=nested_obj, title=gettext("Organisations"), infotext=infotext, name='organisation', page_url=url) try: with mc_pool.reserve() as client: client.set(cache_name('organisation', lang), art, time=app.config['CACHE_TIME']) except: # TODO what to do? pass return art
def compute_artikelforfattare(infotext='', description='', lang="", cache=True, url=''): set_language_switch_link("articleauthor_index", lang=lang) art, lang = getcache('author', lang, cache) if art is not None: return art q_data = { 'buckets': 'artikel_forfattare_fornamn.bucket,artikel_forfattare_efternamn.bucket' } data = karp_query('statlist', q_data) # strip kw0 to get correct sorting stat_table = [[kw[0].strip()] + kw[1:] for kw in data['stat_table'] if kw[0] != ""] stat_table = [[kw[1] + ',', kw[0], kw[2]] for kw in stat_table] # Remove duplicates and some wrong ones (because of backend limitation): stoplist = { u"Grevesmühl,Kajsa": True, u"Ohrlander,Anders": True, u"Myrberg Burström,Mats": True, u"Burström,Nanouschka": True, u"Ljung,Yvonne": True, u"Lindholm,Barbro": True } added = {} new_stat_table = [] for item in stat_table: fullname = item[0] + item[1] if fullname not in added and fullname not in stoplist: new_stat_table.append(item) added[fullname] = True art = render_template('bucketresults.html', results=new_stat_table, alphabetical=True, title=gettext('Article authors'), name='articleauthor', infotext=infotext, description=description, sortnames=True, page_url=url) try: with mc_pool.reserve() as client: client.set(cache_name('author', lang), art, time=app.config['CACHE_TIME']) except: # TODO what to do? pass return art
def find_link(searchstring): # Finds an article based on ISNI or name if re.search('^[0-9 ]*$', searchstring): searchstring = searchstring.replace(" ", "") data = karp_query( 'query', {'q': "extended||and|swoid.search|equals|%s" % (searchstring)}) else: parts = searchstring.split(" ") if "," in searchstring or len( parts) == 1: # When there is only a first name (a queen or so) # case 1: "Margareta" # case 2: "Margareta, drottning" firstname = parts[0] if len(parts) == 1 else searchstring data = karp_query('query', { 'q': "extended||and|fornamn.search|contains|%s" % (firstname) }) else: fornamn = " ".join(parts[0:-1]) prefix = "" last_fornamn = fornamn.split(" ")[-1] if last_fornamn == "von" or last_fornamn == "af": fornamn = " ".join(fornamn.split(" ")[0:-1]) prefix = last_fornamn + " " efternamn = prefix + parts[-1] data = karp_query( 'query', { 'q': "extended||and|fornamn.search|contains|%s||and|efternamn.search|contains|%s" % (fornamn, efternamn) }) # The expected case: only one hit is found if data['hits']['total'] == 1: url = data['hits']['hits'][0]['_source'].get('url') es_id = data['hits']['hits'][0]['_id'] return data, (url or es_id) # Otherwise just return the data else: return data, ''
def compute_place(lang="", cache=True, url=''): set_language_switch_link("place_index", lang=lang) art, lang = getcache('place', lang, cache) if art is not None: return art infotext = helpers.get_infotext("place", request.url_rule.rule) def parse(kw): place = kw.get('key') # May be used to parse names with or without coordinates: # "Lysekil" or "Lysekil|58.275573|11.435558" if '|' in place: name, lat, lon = place.split('|') else: name = place.strip() lat, lon = 0, 0 placename = name if name else '%s, %s' % (lat, lon) return { 'name': placename, 'lat': lat, 'lon': lon, 'count': kw.get('doc_count') } def has_name(kw): name = kw.get('key').split('|')[0] if name and u"(osäker uppgift)" not in name: return name else: return None # To use the coordinates, use 'getplaces' instead of 'getplacenames' data = karp_query('getplacenames', {}) stat_table = [parse(kw) for kw in data['places'] if has_name(kw)] art = render_template('places.html', places=stat_table, title=gettext("Placenames"), infotext=infotext, description=helpers.get_shorttext(infotext), page_url=url) try: with mc_pool.reserve() as client: client.set(cache_name('place', lang), art, time=app.config['CACHE_TIME']) except: # TODO what to do? pass return art
def search(): set_language_switch_link("search") search = request.args.get('q', '').encode('utf-8') pagename = 'search' + urllib.quote(search) page = check_cache(pagename) if page is not None: return page advanced_search_text = '' if search: show = ','.join( ['name', 'url', 'undertitel', 'undertitel_eng', 'lifespan']) karp_q = { 'highlight': True, 'size': app.config['SEARCH_RESULT_SIZE'], 'show': show } if '*' in search: search = re.sub('(?<!\.)\*', '.*', search) karp_q['q'] = "extended||and|anything|regexp|%s" % search else: karp_q['q'] = "extended||and|anything|contains|%s" % search data = karp_query('minientry', karp_q, mode='skbl') with app.open_resource("static/pages/advanced-search/%s.html" % (g.language)) as f: advanced_search_text = f.read() karp_url = "https://spraakbanken.gu.se/karp/#?mode=skbl&advanced=false&hpp=25&extended=and%7Cnamn%7Cequals%7C&searchTab=simple&page=1&search=simple%7C%7C" + search.decode( "utf-8") else: data = {"hits": {"total": 0, "hits": []}} karp_url = "" search = u'\u200b'.encode('utf8') t = render_template( 'list.html', headline="", subheadline=gettext('Hits for "%s"') % search.decode("UTF-8"), hits_name=data["hits"], hits=data["hits"], advanced_search_text=advanced_search_text.decode("UTF-8"), search=search.decode("UTF-8"), alphabetic=True, karp_url=karp_url, more=data["hits"]["total"] > app.config["SEARCH_RESULT_SIZE"], show_lang_switch=False) return set_cache(t, name=pagename, no_hits=data["hits"]["total"])
def bucketcall(queryfield='', name='', title='', sortby='', lastnamefirst=False, infotext='', description='', query='', alphabetical=False, insert_entries=None, page_url=''): q_data = {'buckets': '%s.bucket' % queryfield} if query: q_data['q'] = query data = karp_query('statlist', q_data) # Strip kw0 to get correct sorting stat_table = [[kw[0].strip()] + kw[1:] for kw in data['stat_table'] if kw[0] != ""] # Insert entries that function as references if insert_entries: stat_table.extend(insert_entries) if sortby: stat_table.sort(key=sortby) else: stat_table.sort() if lastnamefirst: stat_table = [[kw[1] + ',', kw[0], kw[2]] for kw in stat_table] # if showfield: # stat_table = [[showfield(kw), kw[2]] for kw in stat_table] return render_template('bucketresults.html', results=stat_table, alphabetical=alphabetical, title=gettext(title), name=name, infotext=infotext, description=description, page_url=page_url)
def place(place=None): pagename = urllib.quote('place_' + place.encode('utf8')) art = check_cache(pagename) if art is not None: return art lat = request.args.get('lat') lon = request.args.get('lon') set_language_switch_link("place_index", place) hits = karp_query('query', { 'q': "extended||and|plats.searchraw|equals|%s" % (place.encode('utf-8')) }) no_hits = hits['hits']['total'] if no_hits > 0: page = render_template('placelist.html', title=place, lat=lat, lon=lon, headline=place, hits=hits["hits"]) else: page = render_template('page.html', content=gettext('Contents could not be found!')) return set_cache(page, name=pagename, no_hits=no_hits)
def searchresult(result, name='', searchfield='', imagefolder='', query='', searchtype='equals', title='', authorinfo=False, lang='', show_lang_switch=True, cache=True): set_language_switch_link("%s_index" % name, result) try: result = result.encode("UTF-8") pagename = name + '_' + urllib.quote(result) art = check_cache(pagename, lang) if art is not None: return art show = ','.join( ['name', 'url', 'undertitel', 'lifespan', 'undertitel_eng']) if query: hits = karp_query('minientry', {'q': query, 'show': show}) else: hits = karp_query( 'minientry', { 'q': "extended||and|%s.search|%s|%s" % (searchfield, searchtype, result), 'show': show }) title = title or result.decode("UTF-8") no_hits = hits['hits']['total'] if no_hits > 0: picture = None if os.path.exists(app.config.root_path + '/static/images/%s/%s.jpg' % (imagefolder, result)): picture = '/static/images/%s/%s.jpg' % (imagefolder, result) page = render_template('list.html', picture=picture, alphabetic=True, title=title, headline=title, hits=hits["hits"], authorinfo=authorinfo, show_lang_switch=show_lang_switch) if no_hits >= app.config['CACHE_HIT_LIMIT']: try: with mc_pool.reserve() as client: client.set(cache_name(pagename, lang), page, time=app.config['CACHE_TIME']) except: # TODO what to do? pass return page else: return render_template( 'page.html', content=gettext('Contents could not be found!')) except Exception as e: return render_template( 'page.html', content="%s\n%s: extended||and|%s.search|%s|%s" % (e, app.config['KARP_BACKEND'], searchfield, searchtype, result))