def format_element(bfo, separator='; ', nbOnly='no', searchlink='no'): """ Prints the records (or number of records) citing this record DO NOT USE > testing, not on cdsweb @param nbOnly only print number @param searchlink print number (if nbOnly) as a link to the search to find these items @param separator a separator between citations """ from urllib import quote from invenio.config import CFG_SITE_URL primary_report_numbers = bfo.fields('037__a') additional_report_numbers = bfo.fields('088__a') report_numbers = primary_report_numbers report_numbers.extend(additional_report_numbers) report_numbers = [quote(rep_num) for rep_num in report_numbers] res = [] for rep_num in report_numbers: res.extend(list(search_pattern(p=rep_num, f='999C5r'))) if nbOnly.lower() == 'yes': if searchlink.lower()=='yes': from bfe_server_info import format_element as bfe_server return '<a href="'+CFG_SITE_URL+'/search?p=recid:'+bfo.control_field('001')+'&rm=citation">'+str(len(res))+'</a>' else: return str(len(res)) else: from invenio.bibformat import format_records return '<br/>'.join(format_records(res, 'hs'))
def _get_coauthors_fallback(personid, collabs): # python 2.4 does not supprt max() with key argument. # Please remove this function when python 2.6 is supported. def max_key(iterable, key): try: ret = iterable[0] except IndexError: return None for i in iterable[1:]: if key(i) > key(ret): ret = i return ret if collabs: query = 'exactauthor:"%s" and (%s)' % (personid, ' or '.join([('collaboration:"%s"' % x) for x in zip(*collabs)[0]])) exclude_recs = perform_request_search(rg=0, p=query) else: exclude_recs = [] recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(personid)) recids = list(set(recids) - set(exclude_recs)) a = format_records(recids, 'WAPAFF') a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p] coauthors = {} for rec, affs in a: keys = affs.keys() for n in keys: try: coauthors[n].add(rec) except KeyError: coauthors[n] = set([rec]) coauthors = [(x, x, len(coauthors[x])) for x in coauthors if x.lower() != personid.lower()] return coauthors
def _get_coauthors_fallback(personid, collabs): # python 2.4 does not supprt max() with key argument. # Please remove this function when python 2.6 is supported. def max_key(iterable, key): try: ret = iterable[0] except IndexError: return None for i in iterable[1:]: if key(i) > key(ret): ret = i return ret if collabs: query = 'exactauthor:"%s" and (%s)' % (personid, ' or '.join([('collaboration:"%s"' % x) for x in zip(*collabs)[0]])) exclude_recs = perform_request_search(rg=0, p=query) else: exclude_recs = [] recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(personid)) recids = list(set(recids) - set(exclude_recs)) a = format_records(recids, 'WAPAFF') a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p] coauthors = {} for rec, affs in a: keys = affs.keys() for n in keys: try: coauthors[n].add(rec) except KeyError: coauthors[n] = set([rec]) coauthors = [(x, x, len(coauthors[x])) for x in coauthors if x.lower() != personid.lower()] return coauthors
def format_element(bfo, separator='; ', nbOnly='no', searchlink='no'): """ Prints the records (or number of records) citing this record DO NOT USE > testing, not on cdsweb @param nbOnly only print number @param searchlink print number (if nbOnly) as a link to the search to find these items @param separator a separator between citations """ from urllib import quote from invenio.config import CFG_SITE_URL primary_report_numbers = bfo.fields('037__a') additional_report_numbers = bfo.fields('088__a') report_numbers = primary_report_numbers report_numbers.extend(additional_report_numbers) report_numbers = [quote(rep_num) for rep_num in report_numbers] res = [] for rep_num in report_numbers: res.extend(list(search_pattern(p=rep_num, f='999C5r'))) if nbOnly.lower() == 'yes': if searchlink.lower()=='yes': from bfe_server_info import format_element as bfe_server return '<a href="'+CFG_SITE_URL+'/search?p=recid:'+bfo.control_field('001')+'&rm=citation">'+str(len(res))+'</a>' else: return str(len(res)) else: from invenio.bibformat import format_records return '<br/>'.join(format_records(res, 'hs'))
def _get_pubs_per_year_fallback(person_id): ''' Returns a dict consisting of: year -> number of publications in that year (given a personID). @param person_id: int personid @return [{'year':no_of_publications}, bool] ''' recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id)) a = format_records(recids, 'WAPDAT') a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p] return _get_pubs_per_year_dictionary(a)
def format_element(bfo, display_recent_too='no', nb_max='10'): """ Returns a list of similar movies. If there are none, and display_recent_too == 'yes', returns most recent movies @param display_recent_too if 'yes' and not similar movie, display most recent movies @param more_link if 'yes', print link to video collection """ out = """ <script> $(document).ready(function(){ $(".bfe_cern_movie_thumbnail").each(function(){ if ( $.trim($(this).html()).length == 0 ) { $(this).html('<div style="font-weight: bold; text-align: center; margin-top: 33px;\">No preview available</div>'); } }); }); </script> """ if nb_max.isdigit(): nb_max = int(nb_max) else: nb_max = 10 video_type = bfo.field('690C_a') search_in_coll = 'Video Movies' if 'rush' in video_type: search_in_coll = 'Video Rushes' results = perform_request_search( of="id", p="recid:{0!s}".format(bfo.recID), rm="wrd", c=search_in_coll, cc=search_in_coll ) if bfo.recID in results: results.remove(bfo.recID) if len(results) < nb_max and display_recent_too == 'yes': other_results = perform_request_search( of="id", c=search_in_coll, cc=search_in_coll ) if bfo.recID in other_results: other_results.remove(bfo.recID) results.extend(other_results) out += format_records(results[:nb_max], of='hs') return out
def _get_pubs_per_year_fallback(person_id): ''' Returns a dict consisting of: year -> number of publications in that year (given a personID). @param person_id: int personid @return [{'year':no_of_publications}, bool] ''' recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id)) a = format_records(recids, 'WAPDAT') a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p] return _get_pubs_per_year_dictionary(a)
def _get_institute_pub_dict_fallback(recids, names_list, person_id): """return a dictionary consisting of institute -> list of publications""" recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id)) a = format_records(recids, 'WAPAFF') a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p] affdict = {} for rec, affs in a: keys = affs.keys() for name in names_list: if name in keys and affs[name][0]: try: affdict[affs[name][0]].add(rec) except KeyError: affdict[affs[name][0]] = set([rec]) return affdict
def _get_institute_pub_dict_fallback(recids, names_list, person_id): """return a dictionary consisting of institute -> list of publications""" recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id)) a = format_records(recids, 'WAPAFF') a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p] affdict = {} for rec, affs in a: keys = affs.keys() for name in names_list: if name in keys and affs[name][0]: try: affdict[affs[name][0]].add(rec) except KeyError: affdict[affs[name][0]] = set([rec]) return affdict
def _get_institute_pubs_dict(recids, names_list): a = format_records(recids, 'WAPAFF') a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p] affdict = {} for rec, affs in a: keys = affs.keys() for name in names_list: if name in keys and affs[name][0]: for aff in affs[name]: try: affdict[aff].add(rec) except KeyError: affdict[aff] = set([rec]) # the serialization function (msgpack.packb) cannot serialize a python set for key in affdict.keys(): affdict[key] = list(affdict[key]) return affdict
def _get_institute_pubs_dict(recids, names_list): a = format_records(recids, 'WAPAFF') a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p] affdict = {} for rec, affs in a: keys = affs.keys() for name in names_list: if name in keys and affs[name][0]: for aff in affs[name]: try: affdict[aff].add(rec) except KeyError: affdict[aff] = set([rec]) # the serialization function (msgpack.packb) cannot serialize a python set for key in affdict.keys(): affdict[key] = list(affdict[key]) return affdict
def _get_institute_pub_dict_bai(recids, names_list, person_id): """return a dictionary consisting of institute -> list of publications""" try: cid = get_canonical_id_from_personid(person_id)[0][0] except IndexError: cid = person_id recids = perform_request_search(rg=0, p='author:%s' % str(cid)) a = format_records(recids, 'WAPAFF') a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p] affdict = {} for rec, affs in a: keys = affs.keys() for name in names_list: if name in keys and affs[name][0]: try: affdict[affs[name][0]].add(rec) except KeyError: affdict[affs[name][0]] = set([rec]) return affdict
def _get_institute_pub_dict_bai(recids, names_list, person_id): """return a dictionary consisting of institute -> list of publications""" try: cid = get_canonical_id_from_personid(person_id)[0][0] except IndexError: cid = person_id recids = perform_request_search(rg=0, p='author:%s' % str(cid)) a = format_records(recids, 'WAPAFF') a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p] affdict = {} for rec, affs in a: keys = affs.keys() for name in names_list: if name in keys and affs[name][0]: try: affdict[affs[name][0]].add(rec) except KeyError: affdict[affs[name][0]] = set([rec]) return affdict
def _get_coauthors_fallback(collabs, person_id): exclude_recs = [] if collabs: query = 'exactauthor:"%s" and (%s)' % (person_id, ' or '.join([('collaboration:"%s"' % x) for x in zip(*collabs)[0]])) exclude_recs = perform_request_search(rg=0, p=query) recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id)) recids = list(set(recids) - set(exclude_recs)) a = format_records(recids, 'WAPAFF') a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p] coauthors = {} for rec, affs in a: keys = affs.keys() for n in keys: try: coauthors[n].add(rec) except KeyError: coauthors[n] = set([rec]) coauthors = [(x, x, len(coauthors[x])) for x in coauthors if x.lower() != person_id.lower()] return coauthors
def _get_coauthors_fallback(collabs, person_id): exclude_recs = [] if collabs: query = 'exactauthor:"%s" and (%s)' % (person_id, ' or '.join( [('collaboration:"%s"' % x) for x in zip(*collabs)[0]])) exclude_recs = perform_request_search(rg=0, p=query) recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id)) recids = list(set(recids) - set(exclude_recs)) a = format_records(recids, 'WAPAFF') a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p] coauthors = {} for rec, affs in a: keys = affs.keys() for n in keys: try: coauthors[n].add(rec) except KeyError: coauthors[n] = set([rec]) coauthors = [(x, x, len(coauthors[x])) for x in coauthors if x.lower() != person_id.lower()] return coauthors
def search(self, read_cache=True, **kwparams): """ Returns records corresponding to the given search query. See docstring of invenio.search_engine.perform_request_search() for an overview of available parameters. @raise InvenioConnectorAuthError: if authentication fails """ parse_results = False of = kwparams.get('of', "") if of == "": parse_results = True of = "xm" kwparams['of'] = of params = urllib.urlencode(kwparams, doseq=1) # Are we running locally? If so, better directly access the # search engine directly if self.local and of != 't': # See if user tries to search any restricted collection c = kwparams.get('c', "") if c != "": if type(c) is list: colls = c else: colls = [c] for collection in colls: if collection_restricted_p(collection): if self.user: self._check_credentials() continue raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n") kwparams['of'] = 'id' results = perform_request_search(**kwparams) if of.lower() != 'id': results = format_records(results, of) else: if not self.cached_queries.has_key(params + str(parse_results)) or not read_cache: if self.user: results = self.browser.open(self.server_url + "/search?" + params) else: results = urllib2.urlopen(self.server_url + "/search?" + params) if 'youraccount/login' in results.geturl(): # Current user not able to search collection raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n") else: return self.cached_queries[params + str(parse_results)] if parse_results: # FIXME: we should not try to parse if results is string parsed_records = self._parse_results(results, self.cached_records) self.cached_queries[params + str(parse_results)] = parsed_records return parsed_records else: # pylint: disable=E1103 # The whole point of the following code is to make sure we can # handle two types of variable. try: res = results.read() except AttributeError: res = results # pylint: enable=E1103 if of == "id": try: if type(res) is str: # Transform to list res = [int(recid.strip()) for recid in \ res.strip("[]").split(",") if recid.strip() != ""] res.reverse() except (ValueError, AttributeError): res = [] self.cached_queries[params + str(parse_results)] = res return self.cached_queries[params + str(parse_results)]
def __call__(self, req, form): """RSS 2.0 feed service.""" # Keep only interesting parameters for the search default_params = websearch_templates.rss_default_urlargd # We need to keep 'jrec' and 'rg' here in order to have # 'multi-page' RSS. These parameters are not kept be default # as we don't want to consider them when building RSS links # from search and browse pages. default_params.update({'jrec':(int, 1), 'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)}) argd = wash_urlargd(form, default_params) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # Create a standard filename with these parameters current_url = websearch_templates.build_rss_url(argd) cache_filename = current_url.split('/')[-1] # In the same way as previously, add 'jrec' & 'rg' req.content_type = "application/rss+xml" req.send_http_header() try: # Try to read from cache path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) # Check if cache needs refresh filedesc = open(path, "r") last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime) assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL)) c_rss = filedesc.read() filedesc.close() req.write(c_rss) return except Exception, e: # do it live and cache previous_url = None if argd['jrec'] > 1: prev_jrec = argd['jrec'] - argd['rg'] if prev_jrec < 1: prev_jrec = 1 previous_url = websearch_templates.build_rss_url(argd, jrec=prev_jrec) #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT req.argd = argd recIDs = perform_request_search(req, of="id", c=argd['c'], cc=argd['cc'], p=argd['p'], f=argd['f'], p1=argd['p1'], f1=argd['f1'], m1=argd['m1'], op1=argd['op1'], p2=argd['p2'], f2=argd['f2'], m2=argd['m2'], op2=argd['op2'], p3=argd['p3'], f3=argd['f3'], m3=argd['m3'], wl=argd['wl']) nb_found = len(recIDs) next_url = None if len(recIDs) >= argd['jrec'] + argd['rg']: next_url = websearch_templates.build_rss_url(argd, jrec=(argd['jrec'] + argd['rg'])) first_url = websearch_templates.build_rss_url(argd, jrec=1) last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1) recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1] rss_prologue = '<?xml version="1.0" encoding="UTF-8"?>\n' + \ websearch_templates.tmpl_xml_rss_prologue(current_url=current_url, previous_url=previous_url, next_url=next_url, first_url=first_url, last_url=last_url, nb_found=nb_found, jrec=argd['jrec'], rg=argd['rg'], cc=argd['cc']) + '\n' req.write(rss_prologue) rss_body = format_records(recIDs, of='xr', ln=argd['ln'], user_info=user_info, record_separator="\n", req=req, epilogue="\n") rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n' req.write(rss_epilogue) # update cache dirname = "%s/rss" % (CFG_CACHEDIR) mymkdir(dirname) fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) try: # Remove the file just in case it already existed # so that a bit of space is created os.remove(fullfilename) except OSError: pass # Check if there's enough space to cache the request. if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS: try: os.umask(022) with open(fullfilename, "w") as fd: fd.write(rss_prologue + rss_body + rss_epilogue) except IOError as v: if v[0] == 36: # URL was too long. Never mind, don't cache pass else: raise
def search(self, p="", f="", c="", rg=10, sf="", so="d", sp="", rm="", of="", ot="", p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", jrec=0, recid=-1, recidb=-1, d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", ap=0, read_cache=True): """ Returns records corresponding to the given search query. """ parse_results = False if of == "": parse_results = True of = "xm" params = { 'p': p, 'f': f, 'c': c, 'rg': rg, 'sf': sf, 'so': so, 'sp': sp, 'rm': rm, 'of': of, 'p1': p1, 'f1': f1, 'm1': m1, 'op1': op1, 'p2': p2, 'f2': f2, 'm2': m2, 'op2': op2, 'p3': p3, 'f3': f3, 'm3': m3, 'jrec': jrec, 'd1': d1, 'd1y': d1y, 'd1m': d1m, 'd1d': d1d, 'd2': d2, 'd2y': d2y, 'd2m': d2m, 'd2d': d2d, 'dt': dt, 'ap': ap, 'recid': recid, 'recidb': recidb, 'ot': ot } if recid == -1: del params['recid'] if recidb == -1: del params['recidb'] params = urllib.urlencode(params) # Are we running locally? If so, better directly access the # search engine directly if LOCAL_SITE_URL == self.server_url and \ of != 't': results = perform_request_search(p=p, f=f, c=c, rg=rg, sf=sf, so=so, sp=so, rm=rm, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, jrec=jrec, recid=recid, recidb=recidb, of='id', ot=ot, d1=d1, d1y=d1y, d1m=d1m, d1d=d1d, d2=d2, d2y=d2y, d2m=d2m, d2d=d2d, dt=dt, ap=ap) if of.lower() != 'id': results = format_records(results, of) else: if not self.cached_queries.has_key( params + str(parse_results)) or not read_cache: results = urllib2.urlopen(self.server_url + "/search?" + params) else: return self.cached_queries[params + str(parse_results)] if parse_results: # FIXME: we should not try to parse if results is string parsed_records = self._parse_results(results, self.cached_records) self.cached_queries[params + str(parse_results)] = parsed_records return parsed_records else: # pylint: disable=E1103 # The whole point of the following code is to make sure we can # handle two types of variable. try: res = results.read() except AttributeError: res = results # pylint: enable=E1103 if of == "id": if type(res) is str: # Transform to list res = [int(recid.strip()) for recid in \ res.strip("[]").split(",") if recid.strip() != ""] res.reverse() self.cached_queries[params + str(parse_results)] = res return self.cached_queries[params + str(parse_results)]
def search(self, p="", f="", c="", rg=10, sf="", so="d", sp="", rm="", of="", ot="", p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", jrec=0, recid=-1, recidb=-1, d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", ap=0, read_cache=True): """ Returns records corresponding to the given search query. @raise InvenioConnectorAuthError: if authentication fails """ parse_results = False if of == "": parse_results = True of = "xm" params = {'p': p, 'f': f, 'c': c, 'rg': rg, 'sf': sf, 'so': so, 'sp': sp, 'rm': rm, 'of': of, 'p1':p1, 'f1': f1, 'm1': m1, 'op1': op1, 'p2': p2, 'f2': f2, 'm2': m2, 'op2': op2, 'p3': p3, 'f3': f3, 'm3': m3, 'jrec':jrec, 'd1': d1, 'd1y':d1y, 'd1m': d1m, 'd1d': d1d, 'd2': d2, 'd2y': d2y, 'd2m': d2m, 'd2d': d2d, 'dt': dt, 'ap': ap , 'recid': recid, 'recidb': recidb, 'ot': ot} if recid == -1: del params['recid'] if recidb == -1: del params['recidb'] params = urllib.urlencode(params, doseq=1) # Are we running locally? If so, better directly access the # search engine directly if self.server_url in LOCAL_SITE_URLS and of != 't': # See if user tries to search any restricted collection if c != "": if type(c) is list: colls = c else: colls = [c] for collection in colls: if collection_restricted_p(collection): if self.user: self._check_credentials() continue raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n") results = perform_request_search(p=p, f=f, c=c, rg=rg, sf=sf, so=so, sp=so, rm=rm, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, jrec=jrec, recid=recid, recidb=recidb, of='id', ot=ot, d1=d1, d1y=d1y, d1m=d1m, d1d=d1d, d2=d2, d2y=d2y, d2m=d2m, d2d=d2d, dt=dt, ap=ap) if of.lower() != 'id': results = format_records(results, of) else: if not self.cached_queries.has_key(params + str(parse_results)) or not read_cache: if self.user: results = self.browser.open(self.server_url + "/search?" + params) else: results = urllib2.urlopen(self.server_url + "/search?" + params) if 'youraccount/login' in results.geturl(): # Current user not able to search collection raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n") else: return self.cached_queries[params + str(parse_results)] if parse_results: # FIXME: we should not try to parse if results is string parsed_records = self._parse_results(results, self.cached_records) self.cached_queries[params + str(parse_results)] = parsed_records return parsed_records else: # pylint: disable=E1103 # The whole point of the following code is to make sure we can # handle two types of variable. try: res = results.read() except AttributeError: res = results # pylint: enable=E1103 if of == "id": try: if type(res) is str: # Transform to list res = [int(recid.strip()) for recid in \ res.strip("[]").split(",") if recid.strip() != ""] res.reverse() except (ValueError, AttributeError): res = [] self.cached_queries[params + str(parse_results)] = res return self.cached_queries[params + str(parse_results)]
def search(self, p="", f="", c="", rg=10, sf="", so="d", sp="", rm="", of="", ot="", p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", jrec=0, recid=-1, recidb=-1, d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", ap=0, read_cache=True): """ Returns records corresponding to the given search query. """ parse_results = False if of == "": parse_results = True of = "xm" params = {'p': p, 'f': f, 'c': c, 'rg': rg, 'sf': sf, 'so': so, 'sp': sp, 'rm': rm, 'of': of, 'p1':p1, 'f1': f1, 'm1': m1, 'op1': op1, 'p2': p2, 'f2': f2, 'm2': m2, 'op2': op2, 'p3': p3, 'f3': f3, 'm3': m3, 'jrec':jrec, 'd1': d1, 'd1y':d1y, 'd1m': d1m, 'd1d': d1d, 'd2': d2, 'd2y': d2y, 'd2m': d2m, 'd2d': d2d, 'dt': dt, 'ap': ap , 'recid': recid, 'recidb': recidb, 'ot': ot} if recid == -1: del params['recid'] if recidb == -1: del params['recidb'] params = urllib.urlencode(params) # Are we running locally? If so, better directly access the # search engine directly if LOCAL_SITE_URL == self.server_url and \ of != 't': results = perform_request_search(p=p, f=f, c=c, rg=rg, sf=sf, so=so, sp=so, rm=rm, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, jrec=jrec, recid=recid, recidb=recidb, of='id', ot=ot, d1=d1, d1y=d1y, d1m=d1m, d1d=d1d, d2=d2, d2y=d2y, d2m=d2m, d2d=d2d, dt=dt, ap=ap) if of.lower() != 'id': results = format_records(results, of) else: if not self.cached_queries.has_key(params + str(parse_results)) or not read_cache: results = urllib2.urlopen(self.server_url + "/search?" + params) else: return self.cached_queries[params + str(parse_results)] if parse_results: # FIXME: we should not try to parse if results is string parsed_records = self._parse_results(results, self.cached_records) self.cached_queries[params + str(parse_results)] = parsed_records return parsed_records else: # pylint: disable=E1103 # The whole point of the following code is to make sure we can # handle two types of variable. try: res = results.read() except AttributeError: res = results # pylint: enable=E1103 if of == "id": if type(res) is str: # Transform to list res = [int(recid.strip()) for recid in \ res.strip("[]").split(",") if recid.strip() != ""] res.reverse() self.cached_queries[params + str(parse_results)] = res return self.cached_queries[params + str(parse_results)]
def search(self, read_cache=True, **kwparams): """ Returns records corresponding to the given search query. See docstring of invenio.search_engine.perform_request_search() for an overview of available parameters. @raise InvenioConnectorAuthError: if authentication fails """ parse_results = False of = kwparams.get('of', "") if of == "": parse_results = True of = "xm" kwparams['of'] = of params = urllib.urlencode(kwparams, doseq=1) # Are we running locally? If so, better directly access the # search engine directly if self.local and of != 't': # See if user tries to search any restricted collection c = kwparams.get('c', "") if c != "": if type(c) is list: colls = c else: colls = [c] for collection in colls: if collection_restricted_p(collection): if self.user: self._check_credentials() continue raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n") kwparams['of'] = 'id' results = perform_request_search(**kwparams) if of.lower() != 'id': results = format_records(results, of) else: if not self.cached_queries.has_key(params + str(parse_results)) or not read_cache: if self.user: results = self.browser.open(self.server_url + "/search?" + params) else: results = urllib2.urlopen(self.server_url + "/search?" + params) if 'youraccount/login' in results.geturl(): # Current user not able to search collection raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n") else: return self.cached_queries[params + str(parse_results)] if parse_results: # FIXME: we should not try to parse if results is string parsed_records = self._parse_results(results, self.cached_records) self.cached_queries[params + str(parse_results)] = parsed_records return parsed_records else: # pylint: disable=E1103 # The whole point of the following code is to make sure we can # handle two types of variable. try: res = results.read() except AttributeError: res = results # pylint: enable=E1103 if of == "id": try: if type(res) is str: # Transform to list res = [int(recid.strip()) for recid in \ res.strip("[]").split(",") if recid.strip() != ""] res.reverse() except (ValueError, AttributeError): res = [] self.cached_queries[params + str(parse_results)] = res return self.cached_queries[params + str(parse_results)]
def search(self, p="", f="", c="", rg=10, sf="", so="d", sp="", rm="", of="", ot="", p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", jrec=0, recid=-1, recidb=-1, d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", ap=0, read_cache=True): """ Returns records corresponding to the given search query. @raise InvenioConnectorAuthError: if authentication fails """ parse_results = False if of == "": parse_results = True of = "xm" params = { 'p': p, 'f': f, 'c': c, 'rg': rg, 'sf': sf, 'so': so, 'sp': sp, 'rm': rm, 'of': of, 'p1': p1, 'f1': f1, 'm1': m1, 'op1': op1, 'p2': p2, 'f2': f2, 'm2': m2, 'op2': op2, 'p3': p3, 'f3': f3, 'm3': m3, 'jrec': jrec, 'd1': d1, 'd1y': d1y, 'd1m': d1m, 'd1d': d1d, 'd2': d2, 'd2y': d2y, 'd2m': d2m, 'd2d': d2d, 'dt': dt, 'ap': ap, 'recid': recid, 'recidb': recidb, 'ot': ot } if recid == -1: del params['recid'] if recidb == -1: del params['recidb'] params = urllib.urlencode(params, doseq=1) # Are we running locally? If so, better directly access the # search engine directly if self.local and of != 't': # See if user tries to search any restricted collection if c != "": if type(c) is list: colls = c else: colls = [c] for collection in colls: if collection_restricted_p(collection): if self.user: self._check_credentials() continue raise InvenioConnectorAuthError( "You are trying to search a restricted collection. Please authenticate yourself.\n" ) results = perform_request_search(p=p, f=f, c=c, rg=rg, sf=sf, so=so, sp=so, rm=rm, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, jrec=jrec, recid=recid, recidb=recidb, of='id', ot=ot, d1=d1, d1y=d1y, d1m=d1m, d1d=d1d, d2=d2, d2y=d2y, d2m=d2m, d2d=d2d, dt=dt, ap=ap) if of.lower() != 'id': results = format_records(results, of) else: if not self.cached_queries.has_key( params + str(parse_results)) or not read_cache: if self.user: results = self.browser.open(self.server_url + "/search?" + params) else: results = urllib2.urlopen(self.server_url + "/search?" + params) if 'youraccount/login' in results.geturl(): # Current user not able to search collection raise InvenioConnectorAuthError( "You are trying to search a restricted collection. Please authenticate yourself.\n" ) else: return self.cached_queries[params + str(parse_results)] if parse_results: # FIXME: we should not try to parse if results is string parsed_records = self._parse_results(results, self.cached_records) self.cached_queries[params + str(parse_results)] = parsed_records return parsed_records else: # pylint: disable=E1103 # The whole point of the following code is to make sure we can # handle two types of variable. try: res = results.read() except AttributeError: res = results # pylint: enable=E1103 if of == "id": try: if type(res) is str: # Transform to list res = [int(recid.strip()) for recid in \ res.strip("[]").split(",") if recid.strip() != ""] res.reverse() except (ValueError, AttributeError): res = [] self.cached_queries[params + str(parse_results)] = res return self.cached_queries[params + str(parse_results)]
def __call__(self, req, form): """RSS 2.0 feed service.""" # Keep only interesting parameters for the search default_params = websearch_templates.rss_default_urlargd # We need to keep 'jrec' and 'rg' here in order to have # 'multi-page' RSS. These parameters are not kept be default # as we don't want to consider them when building RSS links # from search and browse pages. default_params.update({'jrec':(int, 1), 'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)}) argd = wash_urlargd(form, default_params) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # Create a standard filename with these parameters current_url = websearch_templates.build_rss_url(argd) cache_filename = current_url.split('/')[-1] # In the same way as previously, add 'jrec' & 'rg' req.content_type = "application/rss+xml" req.send_http_header() try: # Try to read from cache path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) # Check if cache needs refresh filedesc = open(path, "r") last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime) assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL)) c_rss = filedesc.read() filedesc.close() req.write(c_rss) return except Exception, e: # do it live and cache previous_url = None if argd['jrec'] > 1: prev_jrec = argd['jrec'] - argd['rg'] if prev_jrec < 1: prev_jrec = 1 previous_url = websearch_templates.build_rss_url(argd, jrec=prev_jrec) #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT req.argd = argd recIDs = perform_request_search(req, of="id", c=argd['c'], cc=argd['cc'], p=argd['p'], f=argd['f'], p1=argd['p1'], f1=argd['f1'], m1=argd['m1'], op1=argd['op1'], p2=argd['p2'], f2=argd['f2'], m2=argd['m2'], op2=argd['op2'], p3=argd['p3'], f3=argd['f3'], m3=argd['m3'], wl=argd['wl']) nb_found = len(recIDs) next_url = None if len(recIDs) >= argd['jrec'] + argd['rg']: next_url = websearch_templates.build_rss_url(argd, jrec=(argd['jrec'] + argd['rg'])) first_url = websearch_templates.build_rss_url(argd, jrec=1) last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1) recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1] rss_prologue = '<?xml version="1.0" encoding="UTF-8"?>\n' + \ websearch_templates.tmpl_xml_rss_prologue(current_url=current_url, previous_url=previous_url, next_url=next_url, first_url=first_url, last_url=last_url, nb_found=nb_found, jrec=argd['jrec'], rg=argd['rg'], cc=argd['cc']) + '\n' req.write(rss_prologue) rss_body = format_records(recIDs, of='xr', ln=argd['ln'], user_info=user_info, record_separator="\n", req=req, epilogue="\n") rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n' req.write(rss_epilogue) # update cache dirname = "%s/rss" % (CFG_CACHEDIR) mymkdir(dirname) fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) try: # Remove the file just in case it already existed # so that a bit of space is created os.remove(fullfilename) except OSError: pass # Check if there's enough space to cache the request. if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS: try: os.umask(022) f = open(fullfilename, "w") f.write(rss_prologue + rss_body + rss_epilogue) f.close() except IOError, v: if v[0] == 36: # URL was too long. Never mind, don't cache pass else: raise repr(v)
def cvify_records(recids, of, req=None, so='d'): """ Write a CV for records RECIDS in the format OF in language LN. REQ is the Apache/mod_python request object. """ # intbitsets don't support indexing, so we need a list from our hitset first recids = [hit for hit in recids] if so == 'd': recids.reverse() if of.startswith('h'): if of == 'hcv': format_records(recids, of=of, record_prefix=lambda count: '%d) ' % (count+1), req=req) elif of == 'htcv': format_records(recids, of=of, record_prefix=lambda count: '%d) ' % (count+1), req=req) elif of == 'tlcv': HEADER = r''' \documentclass{article} %%To use pdflatex, uncomment these lines, as well as the \href lines %%in each entry %%\usepackage[pdftex, %% colorlinks=true, %% urlcolor=blue, %% \href{...}{...} external (URL) %% filecolor=green, %% \href{...} local file %% linkcolor=red, %% \ref{...} and \pageref{...} %% pdftitle={Papers by AUTHOR}, %% pdfauthor={Your Name}, %% pdfsubject={Just a test}, %% pdfkeywords={test testing testable}, %% pagebackref, %% pdfpagemode=None, %% bookmarksopen=true]{hyperref} %%usepackage{arial} %%\renewcommand{\familydefault}{\sfdefault} %% San serif \renewcommand{\labelenumii}{\arabic{enumi}.\arabic{enumii}} \pagestyle{empty} \oddsidemargin 0.0in \textwidth 6.5in \topmargin -0.75in \textheight 9.5in \begin{document} \title{Papers by AUTHOR} \author{} \date{} \maketitle \begin{enumerate} %%%% LIST OF PAPERS %%%% Please comment out anything between here and the %%%% first \item %%%% Please send any updates or corrections to the list to %%%% %(email)s ''' % { 'email' : CFG_SITE_SUPPORT_EMAIL, } FOOTER = r''' \end{enumerate} \end{document} ''' format_records(recids, of=of, prologue=HEADER, epilogue=FOOTER, req=req) return ''