def format_element(bfo, separator='; ', nbOnly='no', searchlink='no'):
    """
    Prints the records (or number of records) citing this record

    DO NOT USE > testing, not on cdsweb
    @param nbOnly  only print number
    @param searchlink print number (if nbOnly) as a link to the search to find these items
    @param separator a separator between citations
    """
    from urllib import quote

    from invenio.config import CFG_SITE_URL

    primary_report_numbers = bfo.fields('037__a')
    additional_report_numbers = bfo.fields('088__a')

    report_numbers = primary_report_numbers
    report_numbers.extend(additional_report_numbers)
    report_numbers = [quote(rep_num) for rep_num in report_numbers]

    res = []
    for rep_num in report_numbers:
        res.extend(list(search_pattern(p=rep_num, f='999C5r')))

    if nbOnly.lower() == 'yes':
        if searchlink.lower()=='yes':
            from bfe_server_info import format_element as bfe_server
            return '<a href="'+CFG_SITE_URL+'/search?p=recid:'+bfo.control_field('001')+'&rm=citation">'+str(len(res))+'</a>'
        else:
            return str(len(res))
    else:
        from invenio.bibformat import format_records
        return '<br/>'.join(format_records(res, 'hs'))
def _get_coauthors_fallback(personid, collabs):
    # python 2.4 does not supprt max() with key argument.
    # Please remove this function when python 2.6 is supported.
    def max_key(iterable, key):
        try:
            ret = iterable[0]
        except IndexError:
            return None
        for i in iterable[1:]:
            if key(i) > key(ret):
                ret = i
        return ret

    if collabs:
        query = 'exactauthor:"%s" and (%s)' % (personid, ' or '.join([('collaboration:"%s"' % x) for x in zip(*collabs)[0]]))
        exclude_recs = perform_request_search(rg=0, p=query)
    else:
        exclude_recs = []

    recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(personid))
    recids = list(set(recids) - set(exclude_recs))
    a = format_records(recids, 'WAPAFF')
    a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p]
    coauthors = {}
    for rec, affs in a:
        keys = affs.keys()
        for n in keys:
            try:
                coauthors[n].add(rec)
            except KeyError:
                coauthors[n] = set([rec])

    coauthors = [(x, x, len(coauthors[x])) for x in coauthors if x.lower() != personid.lower()]
    return coauthors
def _get_coauthors_fallback(personid, collabs):
    # python 2.4 does not supprt max() with key argument.
    # Please remove this function when python 2.6 is supported.
    def max_key(iterable, key):
        try:
            ret = iterable[0]
        except IndexError:
            return None
        for i in iterable[1:]:
            if key(i) > key(ret):
                ret = i
        return ret

    if collabs:
        query = 'exactauthor:"%s" and (%s)' % (personid, ' or '.join([('collaboration:"%s"' % x) for x in zip(*collabs)[0]]))
        exclude_recs = perform_request_search(rg=0, p=query)
    else:
        exclude_recs = []

    recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(personid))
    recids = list(set(recids) - set(exclude_recs))
    a = format_records(recids, 'WAPAFF')
    a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p]
    coauthors = {}
    for rec, affs in a:
        keys = affs.keys()
        for n in keys:
            try:
                coauthors[n].add(rec)
            except KeyError:
                coauthors[n] = set([rec])

    coauthors = [(x, x, len(coauthors[x])) for x in coauthors if x.lower() != personid.lower()]
    return coauthors
def format_element(bfo, separator='; ', nbOnly='no', searchlink='no'):
    """
    Prints the records (or number of records) citing this record

    DO NOT USE > testing, not on cdsweb
    @param nbOnly  only print number
    @param searchlink print number (if nbOnly) as a link to the search to find these items
    @param separator a separator between citations
    """
    from urllib import quote

    from invenio.config import CFG_SITE_URL

    primary_report_numbers = bfo.fields('037__a')
    additional_report_numbers = bfo.fields('088__a')

    report_numbers = primary_report_numbers
    report_numbers.extend(additional_report_numbers)
    report_numbers = [quote(rep_num) for rep_num in report_numbers]

    res = []
    for rep_num in report_numbers:
        res.extend(list(search_pattern(p=rep_num, f='999C5r')))

    if nbOnly.lower() == 'yes':
        if searchlink.lower()=='yes':
            from bfe_server_info import format_element as bfe_server
            return '<a href="'+CFG_SITE_URL+'/search?p=recid:'+bfo.control_field('001')+'&amp;rm=citation">'+str(len(res))+'</a>'
        else:
            return str(len(res))
    else:
        from invenio.bibformat import format_records
        return '<br/>'.join(format_records(res, 'hs'))
def _get_pubs_per_year_fallback(person_id):
    '''
    Returns a dict consisting of: year -> number of publications in that year (given a personID).
    @param person_id: int personid
    @return [{'year':no_of_publications}, bool]
    '''
    recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id))
    a = format_records(recids, 'WAPDAT')
    a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p]
    return _get_pubs_per_year_dictionary(a)
def format_element(bfo, display_recent_too='no', nb_max='10'):
    """
    Returns a list of similar movies.
    If there are none, and display_recent_too == 'yes', returns most recent movies
    @param display_recent_too if 'yes' and not similar movie, display most recent movies
    @param more_link if 'yes', print link to video collection
    """

    out = """
<script>
    $(document).ready(function(){
        $(".bfe_cern_movie_thumbnail").each(function(){
            if ( $.trim($(this).html()).length == 0 ) {
                $(this).html('<div style="font-weight: bold; text-align: center; margin-top: 33px;\">No preview available</div>');
            }
        });
    });
</script>
    """

    if nb_max.isdigit():
        nb_max = int(nb_max)
    else:
        nb_max = 10

    video_type = bfo.field('690C_a')

    search_in_coll = 'Video Movies'
    if 'rush' in video_type:
        search_in_coll = 'Video Rushes'

    results = perform_request_search(
        of="id",
        p="recid:{0!s}".format(bfo.recID),
        rm="wrd",
        c=search_in_coll,
        cc=search_in_coll
    )

    if bfo.recID in results:
        results.remove(bfo.recID)

    if len(results) < nb_max and display_recent_too == 'yes':
        other_results = perform_request_search(
            of="id",
            c=search_in_coll,
            cc=search_in_coll
        )
        if bfo.recID in other_results:
            other_results.remove(bfo.recID)
        results.extend(other_results)

    out += format_records(results[:nb_max], of='hs')

    return out
def _get_pubs_per_year_fallback(person_id):
    '''
    Returns a dict consisting of: year -> number of publications in that year (given a personID).
    @param person_id: int personid
    @return [{'year':no_of_publications}, bool]
    '''
    recids = perform_request_search(rg=0,
                                    p='exactauthor:"%s"' % str(person_id))
    a = format_records(recids, 'WAPDAT')
    a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p]
    return _get_pubs_per_year_dictionary(a)
def _get_institute_pub_dict_fallback(recids, names_list, person_id):
    """return a dictionary consisting of institute -> list of publications"""

    recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id))
    a = format_records(recids, 'WAPAFF')
    a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p]
    affdict = {}
    for rec, affs in a:
        keys = affs.keys()
        for name in names_list:
            if name in keys and affs[name][0]:
                try:
                    affdict[affs[name][0]].add(rec)
                except KeyError:
                    affdict[affs[name][0]] = set([rec])
    return affdict
def _get_institute_pub_dict_fallback(recids, names_list, person_id):
    """return a dictionary consisting of institute -> list of publications"""

    recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id))
    a = format_records(recids, 'WAPAFF')
    a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p]
    affdict = {}
    for rec, affs in a:
        keys = affs.keys()
        for name in names_list:
            if name in keys and affs[name][0]:
                try:
                    affdict[affs[name][0]].add(rec)
                except KeyError:
                    affdict[affs[name][0]] = set([rec])
    return affdict
def _get_institute_pubs_dict(recids, names_list):
    a = format_records(recids, 'WAPAFF')
    a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p]
    affdict = {}
    for rec, affs in a:
        keys = affs.keys()
        for name in names_list:
            if name in keys and affs[name][0]:
                for aff in affs[name]:
                    try:
                        affdict[aff].add(rec)
                    except KeyError:
                        affdict[aff] = set([rec])
    # the serialization function (msgpack.packb) cannot serialize a python set
    for key in affdict.keys():
        affdict[key] = list(affdict[key])
    return affdict
def _get_institute_pubs_dict(recids, names_list):
    a = format_records(recids, 'WAPAFF')
    a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p]
    affdict = {}
    for rec, affs in a:
        keys = affs.keys()
        for name in names_list:
            if name in keys and affs[name][0]:
                for aff in affs[name]:
                    try:
                        affdict[aff].add(rec)
                    except KeyError:
                        affdict[aff] = set([rec])
    # the serialization function (msgpack.packb) cannot serialize a python set
    for key in affdict.keys():
        affdict[key] = list(affdict[key])
    return affdict
def _get_institute_pub_dict_bai(recids, names_list, person_id):
    """return a dictionary consisting of institute -> list of publications"""
    try:
        cid = get_canonical_id_from_personid(person_id)[0][0]
    except IndexError:
        cid = person_id
    recids = perform_request_search(rg=0, p='author:%s' % str(cid))
    a = format_records(recids, 'WAPAFF')
    a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p]
    affdict = {}
    for rec, affs in a:
        keys = affs.keys()
        for name in names_list:
            if name in keys and affs[name][0]:
                try:
                    affdict[affs[name][0]].add(rec)
                except KeyError:
                    affdict[affs[name][0]] = set([rec])
    return affdict
def _get_institute_pub_dict_bai(recids, names_list, person_id):
    """return a dictionary consisting of institute -> list of publications"""
    try:
        cid = get_canonical_id_from_personid(person_id)[0][0]
    except IndexError:
        cid = person_id
    recids = perform_request_search(rg=0, p='author:%s' % str(cid))
    a = format_records(recids, 'WAPAFF')
    a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p]
    affdict = {}
    for rec, affs in a:
        keys = affs.keys()
        for name in names_list:
            if name in keys and affs[name][0]:
                try:
                    affdict[affs[name][0]].add(rec)
                except KeyError:
                    affdict[affs[name][0]] = set([rec])
    return affdict
def _get_coauthors_fallback(collabs, person_id):
    exclude_recs = []
    if collabs:
        query = 'exactauthor:"%s" and (%s)' % (person_id, ' or '.join([('collaboration:"%s"' % x) for x in zip(*collabs)[0]]))
        exclude_recs = perform_request_search(rg=0, p=query)
    recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id))
    recids = list(set(recids) - set(exclude_recs))
    a = format_records(recids, 'WAPAFF')
    a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p]
    coauthors = {}
    for rec, affs in a:
        keys = affs.keys()
        for n in keys:
            try:
                coauthors[n].add(rec)
            except KeyError:
                coauthors[n] = set([rec])

    coauthors = [(x, x, len(coauthors[x])) for x in coauthors if x.lower() != person_id.lower()]
    return coauthors
def _get_coauthors_fallback(collabs, person_id):
    exclude_recs = []
    if collabs:
        query = 'exactauthor:"%s" and (%s)' % (person_id, ' or '.join(
            [('collaboration:"%s"' % x) for x in zip(*collabs)[0]]))
        exclude_recs = perform_request_search(rg=0, p=query)
    recids = perform_request_search(rg=0,
                                    p='exactauthor:"%s"' % str(person_id))
    recids = list(set(recids) - set(exclude_recs))
    a = format_records(recids, 'WAPAFF')
    a = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p]
    coauthors = {}
    for rec, affs in a:
        keys = affs.keys()
        for n in keys:
            try:
                coauthors[n].add(rec)
            except KeyError:
                coauthors[n] = set([rec])

    coauthors = [(x, x, len(coauthors[x])) for x in coauthors
                 if x.lower() != person_id.lower()]
    return coauthors
    def search(self, read_cache=True, **kwparams):
        """
        Returns records corresponding to the given search query.

        See docstring of invenio.search_engine.perform_request_search()
        for an overview of available parameters.

        @raise InvenioConnectorAuthError: if authentication fails
        """
        parse_results = False
        of = kwparams.get('of', "")
        if of == "":
            parse_results = True
            of = "xm"
            kwparams['of'] = of
        params = urllib.urlencode(kwparams, doseq=1)

        # Are we running locally? If so, better directly access the
        # search engine directly
        if self.local and of != 't':
            # See if user tries to search any restricted collection
            c = kwparams.get('c', "")
            if c != "":
                if type(c) is list:
                    colls = c
                else:
                    colls = [c]
                for collection in colls:
                    if collection_restricted_p(collection):
                        if self.user:
                            self._check_credentials()
                            continue
                        raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n")
            kwparams['of'] = 'id'
            results = perform_request_search(**kwparams)
            if of.lower() != 'id':
                results = format_records(results, of)
        else:
            if not self.cached_queries.has_key(params + str(parse_results)) or not read_cache:
                if self.user:
                    results = self.browser.open(self.server_url + "/search?" + params)
                else:
                    results = urllib2.urlopen(self.server_url + "/search?" + params)
                if 'youraccount/login' in results.geturl():
                    # Current user not able to search collection
                    raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n")
            else:
                return self.cached_queries[params + str(parse_results)]

        if parse_results:
            # FIXME: we should not try to parse if results is string
            parsed_records = self._parse_results(results, self.cached_records)
            self.cached_queries[params + str(parse_results)] = parsed_records
            return parsed_records
        else:
            # pylint: disable=E1103
            # The whole point of the following code is to make sure we can
            # handle two types of variable.
            try:
                res = results.read()
            except AttributeError:
                res = results
            # pylint: enable=E1103

            if of == "id":
                try:
                    if type(res) is str:
                        # Transform to list
                        res = [int(recid.strip()) for recid in \
                        res.strip("[]").split(",") if recid.strip() != ""]
                    res.reverse()
                except (ValueError, AttributeError):
                    res = []
            self.cached_queries[params + str(parse_results)] = res
            return self.cached_queries[params + str(parse_results)]
Beispiel #17
0
    def __call__(self, req, form):
        """RSS 2.0 feed service."""

        # Keep only interesting parameters for the search
        default_params = websearch_templates.rss_default_urlargd
        # We need to keep 'jrec' and 'rg' here in order to have
        # 'multi-page' RSS. These parameters are not kept be default
        # as we don't want to consider them when building RSS links
        # from search and browse pages.
        default_params.update({'jrec':(int, 1),
                               'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)})
        argd = wash_urlargd(form, default_params)
        user_info = collect_user_info(req)

        for coll in argd['c'] + [argd['cc']]:
            if collection_restricted_p(coll):
                (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll)
                if auth_code and user_info['email'] == 'guest':
                    cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll})
                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                            make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
                    return redirect_to_url(req, target, norobot=True)
                elif auth_code:
                    return page_not_authorized(req, "../", \
                        text=auth_msg, \
                        navmenuid='search')

        # Create a standard filename with these parameters
        current_url = websearch_templates.build_rss_url(argd)
        cache_filename = current_url.split('/')[-1]

        # In the same way as previously, add 'jrec' & 'rg'

        req.content_type = "application/rss+xml"
        req.send_http_header()
        try:
            # Try to read from cache
            path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename)
            # Check if cache needs refresh
            filedesc = open(path, "r")
            last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime)
            assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL))
            c_rss = filedesc.read()
            filedesc.close()
            req.write(c_rss)
            return
        except Exception, e:
            # do it live and cache

            previous_url = None
            if argd['jrec'] > 1:
                prev_jrec = argd['jrec'] - argd['rg']
                if prev_jrec < 1:
                    prev_jrec = 1
                previous_url = websearch_templates.build_rss_url(argd,
                                                                 jrec=prev_jrec)

            #check if the user has rights to set a high wildcard limit
            #if not, reduce the limit set by user, with the default one
            if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
                if acc_authorize_action(req, 'runbibedit')[0] != 0:
                    argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT

            req.argd = argd
            recIDs = perform_request_search(req, of="id",
                                                          c=argd['c'], cc=argd['cc'],
                                                          p=argd['p'], f=argd['f'],
                                                          p1=argd['p1'], f1=argd['f1'],
                                                          m1=argd['m1'], op1=argd['op1'],
                                                          p2=argd['p2'], f2=argd['f2'],
                                                          m2=argd['m2'], op2=argd['op2'],
                                                          p3=argd['p3'], f3=argd['f3'],
                                                          m3=argd['m3'], wl=argd['wl'])
            nb_found = len(recIDs)
            next_url = None
            if len(recIDs) >= argd['jrec'] + argd['rg']:
                next_url = websearch_templates.build_rss_url(argd,
                                                             jrec=(argd['jrec'] + argd['rg']))

            first_url = websearch_templates.build_rss_url(argd, jrec=1)
            last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1)

            recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1]

            rss_prologue = '<?xml version="1.0" encoding="UTF-8"?>\n' + \
            websearch_templates.tmpl_xml_rss_prologue(current_url=current_url,
                                                      previous_url=previous_url,
                                                      next_url=next_url,
                                                      first_url=first_url, last_url=last_url,
                                                      nb_found=nb_found,
                                                      jrec=argd['jrec'], rg=argd['rg'],
                                                      cc=argd['cc']) + '\n'
            req.write(rss_prologue)
            rss_body = format_records(recIDs,
                                      of='xr',
                                      ln=argd['ln'],
                                      user_info=user_info,
                                      record_separator="\n",
                                      req=req, epilogue="\n")
            rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n'
            req.write(rss_epilogue)

            # update cache
            dirname = "%s/rss" % (CFG_CACHEDIR)
            mymkdir(dirname)
            fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename)
            try:
                # Remove the file just in case it already existed
                # so that a bit of space is created
                os.remove(fullfilename)
            except OSError:
                pass

            # Check if there's enough space to cache the request.
            if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS:
                try:
                    os.umask(022)
                    with open(fullfilename, "w") as fd:
                        fd.write(rss_prologue + rss_body + rss_epilogue)
                except IOError as v:
                    if v[0] == 36:
                        # URL was too long. Never mind, don't cache
                        pass
                    else:
                        raise
Beispiel #18
0
    def search(self,
               p="",
               f="",
               c="",
               rg=10,
               sf="",
               so="d",
               sp="",
               rm="",
               of="",
               ot="",
               p1="",
               f1="",
               m1="",
               op1="",
               p2="",
               f2="",
               m2="",
               op2="",
               p3="",
               f3="",
               m3="",
               jrec=0,
               recid=-1,
               recidb=-1,
               d1="",
               d1y=0,
               d1m=0,
               d1d=0,
               d2="",
               d2y=0,
               d2m=0,
               d2d=0,
               dt="",
               ap=0,
               read_cache=True):
        """
        Returns records corresponding to the given search query.
        """
        parse_results = False
        if of == "":
            parse_results = True
            of = "xm"

        params = {
            'p': p,
            'f': f,
            'c': c,
            'rg': rg,
            'sf': sf,
            'so': so,
            'sp': sp,
            'rm': rm,
            'of': of,
            'p1': p1,
            'f1': f1,
            'm1': m1,
            'op1': op1,
            'p2': p2,
            'f2': f2,
            'm2': m2,
            'op2': op2,
            'p3': p3,
            'f3': f3,
            'm3': m3,
            'jrec': jrec,
            'd1': d1,
            'd1y': d1y,
            'd1m': d1m,
            'd1d': d1d,
            'd2': d2,
            'd2y': d2y,
            'd2m': d2m,
            'd2d': d2d,
            'dt': dt,
            'ap': ap,
            'recid': recid,
            'recidb': recidb,
            'ot': ot
        }
        if recid == -1:
            del params['recid']
        if recidb == -1:
            del params['recidb']
        params = urllib.urlencode(params)

        # Are we running locally? If so, better directly access the
        # search engine directly
        if LOCAL_SITE_URL == self.server_url and \
               of != 't':
            results = perform_request_search(p=p,
                                             f=f,
                                             c=c,
                                             rg=rg,
                                             sf=sf,
                                             so=so,
                                             sp=so,
                                             rm=rm,
                                             p1=p1,
                                             f1=f1,
                                             m1=m1,
                                             op1=op1,
                                             p2=p2,
                                             f2=f2,
                                             m2=m2,
                                             op2=op2,
                                             p3=p3,
                                             f3=f3,
                                             m3=m3,
                                             jrec=jrec,
                                             recid=recid,
                                             recidb=recidb,
                                             of='id',
                                             ot=ot,
                                             d1=d1,
                                             d1y=d1y,
                                             d1m=d1m,
                                             d1d=d1d,
                                             d2=d2,
                                             d2y=d2y,
                                             d2m=d2m,
                                             d2d=d2d,
                                             dt=dt,
                                             ap=ap)
            if of.lower() != 'id':
                results = format_records(results, of)
        else:
            if not self.cached_queries.has_key(
                    params + str(parse_results)) or not read_cache:
                results = urllib2.urlopen(self.server_url + "/search?" +
                                          params)
            else:
                return self.cached_queries[params + str(parse_results)]

        if parse_results:
            # FIXME: we should not try to parse if results is string
            parsed_records = self._parse_results(results, self.cached_records)
            self.cached_queries[params + str(parse_results)] = parsed_records
            return parsed_records
        else:
            # pylint: disable=E1103
            # The whole point of the following code is to make sure we can
            # handle two types of variable.
            try:
                res = results.read()
            except AttributeError:
                res = results
            # pylint: enable=E1103

            if of == "id":
                if type(res) is str:
                    # Transform to list
                    res = [int(recid.strip()) for recid in \
                    res.strip("[]").split(",") if recid.strip() != ""]
                res.reverse()
            self.cached_queries[params + str(parse_results)] = res
            return self.cached_queries[params + str(parse_results)]
    def search(self, p="", f="", c="", rg=10, sf="", so="d", sp="",
               rm="", of="", ot="", p1="", f1="", m1="", op1="",
               p2="", f2="", m2="", op2="", p3="", f3="", m3="",
               jrec=0, recid=-1, recidb=-1, d1="", d1y=0, d1m=0,
               d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", ap=0,
               read_cache=True):
        """
        Returns records corresponding to the given search query.

        @raise InvenioConnectorAuthError: if authentication fails
        """
        parse_results = False
        if of == "":
            parse_results = True
            of = "xm"

        params = {'p': p, 'f': f, 'c': c, 'rg': rg,
                  'sf': sf, 'so': so, 'sp': sp,
                  'rm': rm, 'of': of,
                  'p1':p1, 'f1': f1, 'm1': m1, 'op1': op1,
                  'p2': p2, 'f2': f2, 'm2': m2, 'op2': op2,
                  'p3': p3, 'f3': f3, 'm3': m3, 'jrec':jrec,
                  'd1': d1, 'd1y':d1y, 'd1m': d1m, 'd1d': d1d,
                  'd2': d2, 'd2y': d2y, 'd2m': d2m, 'd2d': d2d,
                  'dt': dt, 'ap': ap , 'recid': recid, 'recidb': recidb,
                   'ot': ot}
        if recid == -1:
            del params['recid']
        if recidb == -1:
            del params['recidb']
        params = urllib.urlencode(params, doseq=1)

        # Are we running locally? If so, better directly access the
        # search engine directly
        if self.server_url in LOCAL_SITE_URLS and of != 't':
            # See if user tries to search any restricted collection
            if c != "":
                if type(c) is list:
                    colls = c
                else:
                    colls = [c]
                for collection in colls:
                    if collection_restricted_p(collection):
                        if self.user:
                            self._check_credentials()
                            continue
                        raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n")
            results = perform_request_search(p=p, f=f, c=c, rg=rg, sf=sf, so=so, sp=so, rm=rm,
                                            p1=p1, f1=f1, m1=m1, op1=op1,
                                            p2=p2, f2=f2, m2=m2, op2=op2,
                                            p3=p3, f3=f3, m3=m3, jrec=jrec,
                                            recid=recid, recidb=recidb, of='id', ot=ot,
                                            d1=d1, d1y=d1y, d1m=d1m, d1d=d1d,
                                            d2=d2, d2y=d2y, d2m=d2m, d2d=d2d, dt=dt, ap=ap)
            if of.lower() != 'id':
                results = format_records(results, of)
        else:
            if not self.cached_queries.has_key(params + str(parse_results)) or not read_cache:
                if self.user:
                    results = self.browser.open(self.server_url + "/search?" + params)
                else:
                    results = urllib2.urlopen(self.server_url + "/search?" + params)
                if 'youraccount/login' in results.geturl():
                    # Current user not able to search collection
                    raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n")
            else:
                return self.cached_queries[params + str(parse_results)]

        if parse_results:
            # FIXME: we should not try to parse if results is string
            parsed_records = self._parse_results(results, self.cached_records)
            self.cached_queries[params + str(parse_results)] = parsed_records
            return parsed_records
        else:
            # pylint: disable=E1103
            # The whole point of the following code is to make sure we can
            # handle two types of variable.
            try:
                res = results.read()
            except AttributeError:
                res = results
            # pylint: enable=E1103

            if of == "id":
                try:
                    if type(res) is str:
                        # Transform to list
                        res = [int(recid.strip()) for recid in \
                        res.strip("[]").split(",") if recid.strip() != ""]
                    res.reverse()
                except (ValueError, AttributeError):
                    res = []
            self.cached_queries[params + str(parse_results)] = res
            return self.cached_queries[params + str(parse_results)]
Beispiel #20
0
    def search(self, p="", f="", c="", rg=10, sf="", so="d", sp="",
               rm="", of="", ot="", p1="", f1="", m1="", op1="",
               p2="", f2="", m2="", op2="", p3="", f3="", m3="",
               jrec=0, recid=-1, recidb=-1, d1="", d1y=0, d1m=0,
               d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", ap=0,
               read_cache=True):
        """
        Returns records corresponding to the given search query.
        """
        parse_results = False
        if of == "":
            parse_results = True
            of = "xm"

        params = {'p': p, 'f': f, 'c': c, 'rg': rg,
                  'sf': sf, 'so': so, 'sp': sp,
                  'rm': rm, 'of': of,
                  'p1':p1, 'f1': f1, 'm1': m1, 'op1': op1,
                  'p2': p2, 'f2': f2, 'm2': m2, 'op2': op2,
                  'p3': p3, 'f3': f3, 'm3': m3, 'jrec':jrec,
                  'd1': d1, 'd1y':d1y, 'd1m': d1m, 'd1d': d1d,
                  'd2': d2, 'd2y': d2y, 'd2m': d2m, 'd2d': d2d,
                  'dt': dt, 'ap': ap , 'recid': recid, 'recidb': recidb,
                   'ot': ot}
        if recid == -1:
            del params['recid']
        if recidb == -1:
            del params['recidb']
        params = urllib.urlencode(params)

        # Are we running locally? If so, better directly access the
        # search engine directly
        if LOCAL_SITE_URL == self.server_url and \
               of != 't':
            results = perform_request_search(p=p, f=f, c=c, rg=rg, sf=sf, so=so, sp=so, rm=rm,
                                            p1=p1, f1=f1, m1=m1, op1=op1,
                                            p2=p2, f2=f2, m2=m2, op2=op2,
                                            p3=p3, f3=f3, m3=m3, jrec=jrec,
                                            recid=recid, recidb=recidb, of='id', ot=ot,
                                            d1=d1, d1y=d1y, d1m=d1m, d1d=d1d,
                                            d2=d2, d2y=d2y, d2m=d2m, d2d=d2d, dt=dt, ap=ap)
            if of.lower() != 'id':
                results = format_records(results, of)
        else:
            if not self.cached_queries.has_key(params + str(parse_results)) or not read_cache:
                results = urllib2.urlopen(self.server_url + "/search?" + params)
            else:
                return self.cached_queries[params + str(parse_results)]

        if parse_results:
            # FIXME: we should not try to parse if results is string
            parsed_records = self._parse_results(results, self.cached_records)
            self.cached_queries[params + str(parse_results)] = parsed_records
            return parsed_records
        else:
            # pylint: disable=E1103
            # The whole point of the following code is to make sure we can
            # handle two types of variable.
            try:
                res = results.read()
            except AttributeError:
                res = results
            # pylint: enable=E1103

            if of == "id":
                if type(res) is str:
                    # Transform to list
                    res = [int(recid.strip()) for recid in \
                    res.strip("[]").split(",") if recid.strip() != ""]
                res.reverse()
            self.cached_queries[params + str(parse_results)] = res
            return self.cached_queries[params + str(parse_results)]
    def search(self, read_cache=True, **kwparams):
        """
        Returns records corresponding to the given search query.

        See docstring of invenio.search_engine.perform_request_search()
        for an overview of available parameters.

        @raise InvenioConnectorAuthError: if authentication fails
        """
        parse_results = False
        of = kwparams.get('of', "")
        if of == "":
            parse_results = True
            of = "xm"
            kwparams['of'] = of
        params = urllib.urlencode(kwparams, doseq=1)

        # Are we running locally? If so, better directly access the
        # search engine directly
        if self.local and of != 't':
            # See if user tries to search any restricted collection
            c = kwparams.get('c', "")
            if c != "":
                if type(c) is list:
                    colls = c
                else:
                    colls = [c]
                for collection in colls:
                    if collection_restricted_p(collection):
                        if self.user:
                            self._check_credentials()
                            continue
                        raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n")
            kwparams['of'] = 'id'
            results = perform_request_search(**kwparams)
            if of.lower() != 'id':
                results = format_records(results, of)
        else:
            if not self.cached_queries.has_key(params + str(parse_results)) or not read_cache:
                if self.user:
                    results = self.browser.open(self.server_url + "/search?" + params)
                else:
                    results = urllib2.urlopen(self.server_url + "/search?" + params)
                if 'youraccount/login' in results.geturl():
                    # Current user not able to search collection
                    raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n")
            else:
                return self.cached_queries[params + str(parse_results)]

        if parse_results:
            # FIXME: we should not try to parse if results is string
            parsed_records = self._parse_results(results, self.cached_records)
            self.cached_queries[params + str(parse_results)] = parsed_records
            return parsed_records
        else:
            # pylint: disable=E1103
            # The whole point of the following code is to make sure we can
            # handle two types of variable.
            try:
                res = results.read()
            except AttributeError:
                res = results
            # pylint: enable=E1103

            if of == "id":
                try:
                    if type(res) is str:
                        # Transform to list
                        res = [int(recid.strip()) for recid in \
                        res.strip("[]").split(",") if recid.strip() != ""]
                    res.reverse()
                except (ValueError, AttributeError):
                    res = []
            self.cached_queries[params + str(parse_results)] = res
            return self.cached_queries[params + str(parse_results)]
Beispiel #22
0
    def search(self,
               p="",
               f="",
               c="",
               rg=10,
               sf="",
               so="d",
               sp="",
               rm="",
               of="",
               ot="",
               p1="",
               f1="",
               m1="",
               op1="",
               p2="",
               f2="",
               m2="",
               op2="",
               p3="",
               f3="",
               m3="",
               jrec=0,
               recid=-1,
               recidb=-1,
               d1="",
               d1y=0,
               d1m=0,
               d1d=0,
               d2="",
               d2y=0,
               d2m=0,
               d2d=0,
               dt="",
               ap=0,
               read_cache=True):
        """
        Returns records corresponding to the given search query.

        @raise InvenioConnectorAuthError: if authentication fails
        """
        parse_results = False
        if of == "":
            parse_results = True
            of = "xm"

        params = {
            'p': p,
            'f': f,
            'c': c,
            'rg': rg,
            'sf': sf,
            'so': so,
            'sp': sp,
            'rm': rm,
            'of': of,
            'p1': p1,
            'f1': f1,
            'm1': m1,
            'op1': op1,
            'p2': p2,
            'f2': f2,
            'm2': m2,
            'op2': op2,
            'p3': p3,
            'f3': f3,
            'm3': m3,
            'jrec': jrec,
            'd1': d1,
            'd1y': d1y,
            'd1m': d1m,
            'd1d': d1d,
            'd2': d2,
            'd2y': d2y,
            'd2m': d2m,
            'd2d': d2d,
            'dt': dt,
            'ap': ap,
            'recid': recid,
            'recidb': recidb,
            'ot': ot
        }
        if recid == -1:
            del params['recid']
        if recidb == -1:
            del params['recidb']
        params = urllib.urlencode(params, doseq=1)

        # Are we running locally? If so, better directly access the
        # search engine directly
        if self.local and of != 't':
            # See if user tries to search any restricted collection
            if c != "":
                if type(c) is list:
                    colls = c
                else:
                    colls = [c]
                for collection in colls:
                    if collection_restricted_p(collection):
                        if self.user:
                            self._check_credentials()
                            continue
                        raise InvenioConnectorAuthError(
                            "You are trying to search a restricted collection. Please authenticate yourself.\n"
                        )
            results = perform_request_search(p=p,
                                             f=f,
                                             c=c,
                                             rg=rg,
                                             sf=sf,
                                             so=so,
                                             sp=so,
                                             rm=rm,
                                             p1=p1,
                                             f1=f1,
                                             m1=m1,
                                             op1=op1,
                                             p2=p2,
                                             f2=f2,
                                             m2=m2,
                                             op2=op2,
                                             p3=p3,
                                             f3=f3,
                                             m3=m3,
                                             jrec=jrec,
                                             recid=recid,
                                             recidb=recidb,
                                             of='id',
                                             ot=ot,
                                             d1=d1,
                                             d1y=d1y,
                                             d1m=d1m,
                                             d1d=d1d,
                                             d2=d2,
                                             d2y=d2y,
                                             d2m=d2m,
                                             d2d=d2d,
                                             dt=dt,
                                             ap=ap)
            if of.lower() != 'id':
                results = format_records(results, of)
        else:
            if not self.cached_queries.has_key(
                    params + str(parse_results)) or not read_cache:
                if self.user:
                    results = self.browser.open(self.server_url + "/search?" +
                                                params)
                else:
                    results = urllib2.urlopen(self.server_url + "/search?" +
                                              params)
                if 'youraccount/login' in results.geturl():
                    # Current user not able to search collection
                    raise InvenioConnectorAuthError(
                        "You are trying to search a restricted collection. Please authenticate yourself.\n"
                    )
            else:
                return self.cached_queries[params + str(parse_results)]

        if parse_results:
            # FIXME: we should not try to parse if results is string
            parsed_records = self._parse_results(results, self.cached_records)
            self.cached_queries[params + str(parse_results)] = parsed_records
            return parsed_records
        else:
            # pylint: disable=E1103
            # The whole point of the following code is to make sure we can
            # handle two types of variable.
            try:
                res = results.read()
            except AttributeError:
                res = results
            # pylint: enable=E1103

            if of == "id":
                try:
                    if type(res) is str:
                        # Transform to list
                        res = [int(recid.strip()) for recid in \
                        res.strip("[]").split(",") if recid.strip() != ""]
                    res.reverse()
                except (ValueError, AttributeError):
                    res = []
            self.cached_queries[params + str(parse_results)] = res
            return self.cached_queries[params + str(parse_results)]
Beispiel #23
0
    def __call__(self, req, form):
        """RSS 2.0 feed service."""

        # Keep only interesting parameters for the search
        default_params = websearch_templates.rss_default_urlargd
        # We need to keep 'jrec' and 'rg' here in order to have
        # 'multi-page' RSS. These parameters are not kept be default
        # as we don't want to consider them when building RSS links
        # from search and browse pages.
        default_params.update({'jrec':(int, 1),
                               'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)})
        argd = wash_urlargd(form, default_params)
        user_info = collect_user_info(req)

        for coll in argd['c'] + [argd['cc']]:
            if collection_restricted_p(coll):
                (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll)
                if auth_code and user_info['email'] == 'guest':
                    cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll})
                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                            make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
                    return redirect_to_url(req, target, norobot=True)
                elif auth_code:
                    return page_not_authorized(req, "../", \
                        text=auth_msg, \
                        navmenuid='search')

        # Create a standard filename with these parameters
        current_url = websearch_templates.build_rss_url(argd)
        cache_filename = current_url.split('/')[-1]

        # In the same way as previously, add 'jrec' & 'rg'

        req.content_type = "application/rss+xml"
        req.send_http_header()
        try:
            # Try to read from cache
            path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename)
            # Check if cache needs refresh
            filedesc = open(path, "r")
            last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime)
            assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL))
            c_rss = filedesc.read()
            filedesc.close()
            req.write(c_rss)
            return
        except Exception, e:
            # do it live and cache

            previous_url = None
            if argd['jrec'] > 1:
                prev_jrec = argd['jrec'] - argd['rg']
                if prev_jrec < 1:
                    prev_jrec = 1
                previous_url = websearch_templates.build_rss_url(argd,
                                                                 jrec=prev_jrec)

            #check if the user has rights to set a high wildcard limit
            #if not, reduce the limit set by user, with the default one
            if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
                if acc_authorize_action(req, 'runbibedit')[0] != 0:
                    argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT

            req.argd = argd
            recIDs = perform_request_search(req, of="id",
                                                          c=argd['c'], cc=argd['cc'],
                                                          p=argd['p'], f=argd['f'],
                                                          p1=argd['p1'], f1=argd['f1'],
                                                          m1=argd['m1'], op1=argd['op1'],
                                                          p2=argd['p2'], f2=argd['f2'],
                                                          m2=argd['m2'], op2=argd['op2'],
                                                          p3=argd['p3'], f3=argd['f3'],
                                                          m3=argd['m3'], wl=argd['wl'])
            nb_found = len(recIDs)
            next_url = None
            if len(recIDs) >= argd['jrec'] + argd['rg']:
                next_url = websearch_templates.build_rss_url(argd,
                                                             jrec=(argd['jrec'] + argd['rg']))

            first_url = websearch_templates.build_rss_url(argd, jrec=1)
            last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1)

            recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1]

            rss_prologue = '<?xml version="1.0" encoding="UTF-8"?>\n' + \
            websearch_templates.tmpl_xml_rss_prologue(current_url=current_url,
                                                      previous_url=previous_url,
                                                      next_url=next_url,
                                                      first_url=first_url, last_url=last_url,
                                                      nb_found=nb_found,
                                                      jrec=argd['jrec'], rg=argd['rg'],
                                                      cc=argd['cc']) + '\n'
            req.write(rss_prologue)
            rss_body = format_records(recIDs,
                                      of='xr',
                                      ln=argd['ln'],
                                      user_info=user_info,
                                      record_separator="\n",
                                      req=req, epilogue="\n")
            rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n'
            req.write(rss_epilogue)

            # update cache
            dirname = "%s/rss" % (CFG_CACHEDIR)
            mymkdir(dirname)
            fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename)
            try:
                # Remove the file just in case it already existed
                # so that a bit of space is created
                os.remove(fullfilename)
            except OSError:
                pass

            # Check if there's enough space to cache the request.
            if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS:
                try:
                    os.umask(022)
                    f = open(fullfilename, "w")
                    f.write(rss_prologue + rss_body + rss_epilogue)
                    f.close()
                except IOError, v:
                    if v[0] == 36:
                        # URL was too long. Never mind, don't cache
                        pass
                    else:
                        raise repr(v)
def cvify_records(recids, of, req=None, so='d'):
    """
       Write a CV for records RECIDS in the format OF in language LN.
       REQ is the Apache/mod_python request object.
    """
    # intbitsets don't support indexing, so we need a list from our hitset first
    recids = [hit for hit in recids]
    if so == 'd':
        recids.reverse()
    if of.startswith('h'):
        if of == 'hcv':
            format_records(recids, of=of,
                           record_prefix=lambda count: '%d) ' % (count+1),
                           req=req)
        elif of == 'htcv':
            format_records(recids, of=of,
                           record_prefix=lambda count: '%d) ' % (count+1),
                           req=req)

    elif of == 'tlcv':
        HEADER = r'''
\documentclass{article}
%%To use pdflatex, uncomment these lines, as well as the \href lines
%%in each entry
%%\usepackage[pdftex,
%%       colorlinks=true,
%%       urlcolor=blue,       %% \href{...}{...} external (URL)
%%       filecolor=green,     %% \href{...} local file
%%       linkcolor=red,       %% \ref{...} and \pageref{...}
%%       pdftitle={Papers by AUTHOR},
%%       pdfauthor={Your Name},
%%       pdfsubject={Just a test},
%%       pdfkeywords={test testing testable},
%%       pagebackref,
%%       pdfpagemode=None,
%%        bookmarksopen=true]{hyperref}
%%usepackage{arial}
%%\renewcommand{\familydefault}{\sfdefault} %% San serif
\renewcommand{\labelenumii}{\arabic{enumi}.\arabic{enumii}}

\pagestyle{empty}
\oddsidemargin 0.0in
\textwidth 6.5in
\topmargin -0.75in
\textheight 9.5in

\begin{document}
\title{Papers by AUTHOR}
\author{}
\date{}
\maketitle
\begin{enumerate}

%%%%   LIST OF PAPERS
%%%%   Please comment out anything between here and the
%%%%   first \item
%%%%   Please send any updates or corrections to the list to
%%%%   %(email)s
''' % { 'email' : CFG_SITE_SUPPORT_EMAIL, }
        FOOTER = r'''
\end{enumerate}
\end{document}
'''
        format_records(recids, of=of,
                       prologue=HEADER,
                       epilogue=FOOTER,
                       req=req)

    return ''