예제 #1
0
    def _editions_view(self, seeds, **kw):
        reverse = str(kw.pop("reverse", "")).lower()
        if 'sort' in kw and reverse == "true":
            # sort=\field is the couchdb-lucene's way of telling ORDER BY field DESC
            kw['sort'] = '\\' + kw['sort']
        view_url = config.get("lists", {}).get("editions_view")
        if not view_url:
            return {}

        def escape(value):
            special_chars = '+-&|!(){}[]^"~*?:\\'
            pattern = "([%s])" % re.escape(special_chars)
            
            quote = '"'
            return quote + web.re_compile(pattern).sub(r'\\\1', value) + quote
        
        q = " OR ".join("seed:" + escape(seed.encode('utf-8')) for seed in seeds)
        url = view_url + "?" + urllib.urlencode(dict(kw, q=q))
        
        stats.begin("couchdb", url=url)
        try:
            json = urllib2.urlopen(url).read()
        finally:
            stats.end()
        return simplejson.loads(json)
예제 #2
0
def works_by_author(akey, sort='editions', page=1, rows=100):
    q='author_key:' + akey
    offset = rows * (page - 1)
    fields = ['key', 'author_name', 'author_key', 'title', 'subtitle',
        'edition_count', 'ia', 'cover_edition_key', 'has_fulltext',
        'first_publish_year', 'public_scan_b', 'lending_edition_s',
        'overdrive_s', 'ia_collection_s']
    fl = ','.join(fields)
    solr_select = solr_select_url + "?q.op=AND&q=%s&fq=&start=%d&rows=%d&fl=%s&wt=json" % (q, offset, rows, fl)
    facet_fields = ["author_facet", "language", "publish_year", "publisher_facet", "subject_facet", "person_facet", "place_facet", "time_facet"]
    if sort == 'editions':
        solr_select += '&sort=edition_count+desc'
    elif sort.startswith('old'):
        solr_select += '&sort=first_publish_year+asc'
    elif sort.startswith('new'):
        solr_select += '&sort=first_publish_year+desc'
    elif sort.startswith('title'):
        solr_select += '&sort=title+asc'
    solr_select += "&facet=true&facet.mincount=1&f.author_facet.facet.sort=count&f.publish_year.facet.limit=-1&facet.limit=25&" + '&'.join("facet.field=" + f for f in facet_fields)
    stats.begin("solr", url=solr_select)
    reply = json.load(urllib.urlopen(solr_select))
    stats.end()
    facets = reply['facet_counts']['facet_fields']
    works = [work_object(w) for w in reply['response']['docs']]

    def get_facet(f, limit=None):
        return list(web.group(facets[f][:limit * 2] if limit else facets[f], 2))

    return web.storage(
        num_found = int(reply['response']['numFound']),
        works = works,
        years = [(int(k), v) for k, v in get_facet('publish_year')],
        get_facet = get_facet,
        sort = sort,
    )
예제 #3
0
 def get_results(q, offset=0, limit=100):
     q = escape_bracket(q)
     solr_select = solr_edition_select_url + "?q.op=AND&q=%s&fq=&start=%d&rows=%d&fl=*&qt=standard&wt=json" % (web.urlquote(q), offset, limit)
     stats.begin("solr", url=solr_select)
     json_data = urllib.urlopen(solr_select).read()
     stats.end()
     return json.loads(json_data)
예제 #4
0
def work_search(query, limit=20, offset=0, **kw):
    """Search for works."""

    kw.setdefault("doc_wrapper", work_wrapper)
    fields = [
        "key", 
        "author_name", 
        "author_key", 
        "title",
        "edition_count",
        "ia",
        "cover_edition_key",
        "has_fulltext",
        "subject",
        "ia_collection_s",
        "public_scan_b",
        "overdrive_s",
        "lending_edition_s",
    ]
    kw.setdefault("fields", fields)

    query = process_work_query(query)
    solr = get_works_solr()
    
    stats.begin("solr", query=query, start=offset, rows=limit, kw=kw)
    try:
        result = solr.select(query, start=offset, rows=limit, **kw)
    finally:
        stats.end()
    
    return result
예제 #5
0
 def get_couchdb_docs(self, db, keys):
     try:
         stats.begin(name="_all_docs", keys=keys, include_docs=True)
         docs = dict((row.id, row.doc) for row in db.view("_all_docs", keys=keys, include_docs=True))
     finally:
         stats.end()
     return docs
예제 #6
0
def _old_get_meta_xml(itemid):
    """Returns the contents of meta_xml as JSON.
    """
    itemid = web.safestr(itemid.strip())
    url = 'http://www.archive.org/download/%s/%s_meta.xml' % (itemid, itemid)
    try:
        stats.begin('archive.org', url=url)
        metaxml = urllib2.urlopen(url).read()
        stats.end()
    except IOError:
        logger.error("Failed to download _meta.xml for %s", itemid, exc_info=True)
        stats.end()
        return web.storage()

    # archive.org returns html on internal errors.
    # Checking for valid xml before trying to parse it.
    if not metaxml.strip().startswith("<?xml"):
        return web.storage()

    try:
        defaults = {"collection": [], "external-identifier": []}
        return web.storage(xml2dict(metaxml, **defaults))
    except Exception as e:
        logger.error("Failed to parse metaxml for %s", itemid, exc_info=True)
        return web.storage()
예제 #7
0
    def get_ia_meta_fields(self):
        # Check for cached value
        # $$$ we haven't assigned _ia_meta_fields the first time around but there's apparently
        #     some magic that lets us check this way (and breaks using hasattr to check if defined)
        if self._ia_meta_fields:
            return self._ia_meta_fields
            
        if not self.get('ocaid', None):
            return {}
        ia = self.ocaid
        url = 'http://www.archive.org/download/%s/%s_meta.xml' % (ia, ia)
        reply = dict([ (set_name, set()) for set_name in ia_meta_sets ]) # create empty sets
        try:
            stats.begin("archive.org", url=url)
            f = urllib2.urlopen(url)
            stats.end()
        except:
            stats.end()
            return reply
        for line in f:
            m = re_meta_field.search(line)
            if not m:
                continue
            k = m.group(1).lower()
            v = m.group(2)
            if k == 'collection':
                reply[k].add(v.lower())
            elif k in ia_meta_sets:
                reply[k].add(v)
            else:
                if k in ia_meta_fields:
                    reply[k] = v

        self._ia_meta_fields = reply
        return self._ia_meta_fields
예제 #8
0
 def _couchdb_view(self, db, viewname, **kw):
     stats.begin("couchdb", db=db.name, view=viewname, kw=kw)
     try:
         result = db.view(viewname, **kw)
     finally:
         stats.end()
     return result
예제 #9
0
    def get_ia_meta_fields(self):
        if not self.get('ocaid', None):
            return {}
        ia = self.ocaid
        url = 'http://www.archive.org/download/%s/%s_meta.xml' % (ia, ia)
        reply = { 'collection': set() }
        try:
            stats.begin("archive.org", url=url)
            f = urllib2.urlopen(url)
            stats.end()
        except:
            stats.end()
            return reply
        for line in f:
            m = re_meta_field.search(line)
            if not m:
                continue
            k = m.group(1).lower()
            v = m.group(2)
            if k == 'collection':
                reply[k].add(v.lower())
            else:
                assert k == 'contributor'
                reply[k] = v

        return reply
예제 #10
0
def get_meta_xml(itemid):
    """Returns the contents of meta_xml as JSON.
    """
    itemid = itemid.strip()
    
    url = 'http://www.archive.org/download/%s/%s_meta.xml' % (itemid, itemid)
    try:
        stats.begin("archive.org", url=url)
        metaxml = urllib2.urlopen(url).read()
        stats.end()
    except IOError:
        stats.end()
        return web.storage()
        
    # archive.org returns html on internal errors. 
    # Checking for valid xml before trying to parse it.
    if not metaxml.strip().startswith("<?xml"):
        return web.storage()
    
    try:
        defaults = {"collection": [], "external-identifier": []}
        return web.storage(xml2dict(metaxml, **defaults))
    except Exception, e:
        print >> web.debug, "Failed to parse metaxml for %s: %s" % (itemid, str(e)) 
        return web.storage()
예제 #11
0
 def add(self, key, value, expires=0):
     key = web.safestr(key)
     value = simplejson.dumps(value)
     stats.begin("memcache.add", key=key)
     value = self.memcache.add(key, value, expires)
     stats.end()
     return value
예제 #12
0
 def get_results(q, offset=0, limit=100, snippets=3, fragsize=200, hl_phrase=False):
     m = re_bad_fields.match(q)
     if m:
         return { 'error': m.group(1) + ' search not supported' }
     q = escape_q(q)
     solr_params = [
         ('fl', 'ia,body_length,page_count'),
         ('hl', 'true'),
         ('hl.fl', 'body'),
         ('hl.snippets', snippets),
         ('hl.mergeContiguous', 'true'),
         ('hl.usePhraseHighlighter', 'true' if hl_phrase else 'false'),
         ('hl.simple.pre', '{{{'),
         ('hl.simple.post', '}}}'),
         ('hl.fragsize', fragsize),
         ('q.op', 'AND'),
         ('q', web.urlquote(q)),
         ('start', offset),
         ('rows', limit),
         ('qf', 'body'),
         ('qt', 'standard'),
         ('hl.maxAnalyzedChars', '-1'),
         ('wt', 'json'),
     ]
     solr_select = solr_select_url + '?' + '&'.join("%s=%s" % (k, unicode(v)) for k, v in solr_params)
     stats.begin("solr", url=solr_select)
     json_data = urllib.urlopen(solr_select).read()
     stats.end()
     try:
         return simplejson.loads(json_data)
     except:
         m = re_query_parser_error.search(json_data)
         return { 'error': web.htmlunquote(m.group(1)) }
예제 #13
0
    def _get_solr_data(self):
        if config.get("single_core_solr"):
            key = self.key
        else:
            key = self.get_olid()

        fields = [
            "cover_edition_key", "cover_id", "edition_key", "first_publish_year",
            "has_fulltext", "lending_edition", "checked_out", "public_scan_b", "ia"]
        
        solr = get_works_solr()
        stats.begin("solr", query={"key": key}, fields=fields)
        try:
            d = solr.select({"key": key}, fields=fields)
        finally:
            stats.end()
            
        if d.num_found > 0:
            w = d.docs[0]
        else:
            w = None
                
        # Replace _solr_data property with the attribute
        self.__dict__['_solr_data'] = w
        return w
예제 #14
0
    def get_many(self, sitename, data):
        keys = simplejson.loads(data['keys'])

        stats.begin("memcache.get_multi")
        result = self.memcache.get_multi(keys)
        stats.end(found=len(result))

        keys2 = [k for k in keys if k not in result]
        if keys2:
            data['keys'] = simplejson.dumps(keys2)
            result2 = ConnectionMiddleware.get_many(self, sitename, data)
            result2 = simplejson.loads(result2)

            # Memcache expects dict with (key, json) mapping and we have (key, doc) mapping.
            # Converting the docs to json before passing to memcache.
            self.mc_set_multi(dict((key, simplejson.dumps(doc)) for key, doc in result2.items()))

            result.update(result2)

        #@@ too many JSON conversions
        for k in result:
            if isinstance(result[k], six.string_types):
                result[k] = simplejson.loads(result[k])

        return simplejson.dumps(result)
예제 #15
0
def read_from_archive(ia):
    meta_xml = 'http://archive.org/download/' + ia + '/' + ia + '_meta.xml'
    stats.begin("archive.org", url=meta_xml)
    xml_data = urllib.urlopen(meta_xml)
    item = {}
    try:
        tree = etree.parse(xml_data)
    except etree.XMLSyntaxError:
        return {}
    finally:
        stats.end()
    root = tree.getroot()

    fields = ['title', 'creator', 'publisher', 'date', 'language']

    for k in 'title', 'date', 'publisher':
        v = root.find(k)
        if v is not None:
            item[k] = v.text

    for k in 'creator', 'language', 'collection':
        v = root.findall(k)
        if len(v):
            item[k] = [i.text for i in v if i.text]
    return item
예제 #16
0
파일: code.py 프로젝트: strogo/openlibrary
 def get_results(q, offset=0, limit=100):
     valid_fields = ['key', 'name', 'alternate_names', 'birth_date', 'death_date', 'date', 'work_count']
     q = escape_colon(escape_bracket(q), valid_fields)
     solr_select = solr_author_select_url + "?q.op=AND&q=%s&fq=&start=%d&rows=%d&fl=*&qt=standard&wt=json" % (web.urlquote(q), offset, limit)
     solr_select += '&sort=work_count+desc'
     stats.begin("solr", url=solr_select)
     json_data = urllib.urlopen(solr_select).read()
     stats.end()
     return json.loads(json_data)
예제 #17
0
파일: code.py 프로젝트: strogo/openlibrary
 def get_results(q, offset=0, limit=100):
     valid_fields = ['key', 'name', 'type', 'count']
     q = escape_colon(escape_bracket(q), valid_fields)
     solr_select = solr_subject_select_url + "?q.op=AND&q=%s&fq=&start=%d&rows=%d&fl=name,type,count&qt=standard&wt=json" % (web.urlquote(q), offset, limit)
     solr_select += '&sort=count+desc'
     stats.begin("solr", url=solr_select)
     json_data = urllib.urlopen(solr_select).read()
     stats.end()
     return json.loads(json_data)
예제 #18
0
def simple_search(q, offset=0, rows=20, sort=None):
    solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&fq=&start=%d&rows=%d&fl=*%%2Cscore&qt=standard&wt=json" % (web.urlquote(q), offset, rows)
    if sort:
        solr_select += "&sort=" + web.urlquote(sort)

    stats.begin("solr", url=solr_select)
    json_data = urllib.urlopen(solr_select)
    stats.end()
    return json.load(json_data)
예제 #19
0
    def memcache_set(self, args, kw, value, time):
        """Adds value and time to memcache. Key is computed from the arguments.
        """
        key = self.compute_key(args, kw)
        json = self.json_encode([value, time])

        stats.begin("memcache.set", key=key)
        self.memcache.set(key, json)
        stats.end()
예제 #20
0
 def _couchdb_view(self, db, viewname, **kw):
     stats.begin("couchdb", db=db.name, view=viewname, kw=kw)
     try:
         result = db.view(viewname, **kw)
         
         # force fetching the results
         result.rows
     finally:
         stats.end()
     return result
예제 #21
0
def execute_solr_query(url):
    stats.begin("solr", url=url)
    try:
        solr_result = urllib2.urlopen(url, timeout=3)
    except Exception as e:
        logger.exception("Failed solr query")
        return None
    finally:
        stats.end()
    return solr_result
예제 #22
0
def top_books_from_author(akey, rows=5, offset=0):
    q = 'author_key:(' + akey + ')'
    solr_select = solr_select_url + "?q=%s&start=%d&rows=%d&fl=key,title,edition_count,first_publish_year&wt=json&sort=edition_count+desc" % (q, offset, rows)
    stats.begin("solr", url=solr_select)
    response = json.load(urllib.urlopen(solr_select))['response']
    stats.end()
    return {
        'books': [web.storage(doc) for doc in response['docs']],
        'total': response['numFound'],
    }
예제 #23
0
def top_books_from_author(akey, rows=5, offset=0):
    q = 'author_key:(' + akey + ')'
    solr_select = solr_select_url + "?q=%s&start=%d&rows=%d&fl=key,title,edition_count,first_publish_year&wt=json&sort=edition_count+desc" % (
        q, offset, rows)
    stats.begin("solr", url=solr_select)
    response = json.load(urllib.urlopen(solr_select))['response']
    stats.end()
    return {
        'books': [web.storage(doc) for doc in response['docs']],
        'total': response['numFound'],
    }
예제 #24
0
def run_solr_query(param = {}, rows=100, page=1, sort=None, spellcheck_count=None):
    # called by do_search
    if spellcheck_count == None:
        spellcheck_count = default_spellcheck_count
    offset = rows * (page - 1)

    (q_list, use_dismax) = build_q_list(param)

    fields = ['key', 'author_name', 'author_key', 'title', 'subtitle', 'edition_count', 'ia', 'has_fulltext', 'first_publish_year', 'cover_edition_key', 'public_scan_b', 'lending_edition_s', 'overdrive_s', 'ia_collection_s']
    fl = ','.join(fields)
    if use_dismax:
        q = web.urlquote(' '.join(q_list))
        solr_select = solr_select_url + "?defType=dismax&q.op=AND&q=%s&qf=text+title^5+author_name^5&bf=sqrt(edition_count)^10&start=%d&rows=%d&fl=%s&wt=standard" % (q, offset, rows, fl)
    else:
        q = web.urlquote(' '.join(q_list + ['_val_:"sqrt(edition_count)"^10']))
        solr_select = solr_select_url + "?q.op=AND&q=%s&start=%d&rows=%d&fl=%s&wt=standard" % (q, offset, rows, fl)
    solr_select += '&spellcheck=true&spellcheck.count=%d' % spellcheck_count
    solr_select += "&facet=true&" + '&'.join("facet.field=" + f for f in facet_fields)

    if 'public_scan' in param:
        v = param.pop('public_scan').lower()
        if v in ('true', 'false'):
            if v == 'false':
                # also constrain on print disabled since the index may not be in sync
                param.setdefault('print_disabled', 'false')
            solr_select += '&fq=public_scan_b:%s' % v

    if 'print_disabled' in param:
        v = param.pop('print_disabled').lower()
        if v in ('true', 'false'):
            solr_select += '&fq=%ssubject_key:protected_daisy' % ('-' if v == 'false' else '')

    k = 'has_fulltext'
    if k in param:
        v = param[k].lower()
        if v not in ('true', 'false'):
            del param[k]
        param[k] == v
        solr_select += '&fq=%s:%s' % (k, v)

    for k in facet_list_fields:
        if k == 'author_facet':
            k = 'author_key'
        if k not in param:
            continue
        v = param[k]
        solr_select += ''.join('&fq=%s:"%s"' % (k, url_quote(l)) for l in v if l)
    if sort:
        solr_select += "&sort=" + url_quote(sort)

    stats.begin("solr", url=solr_select)
    reply = urllib.urlopen(solr_select).read()
    stats.end()
    return (reply, solr_select, q_list)
예제 #25
0
def _get_metadata(itemid):
    """Returns metadata by querying the archive.org metadata API.
    """
    url = "http://www.archive.org/metadata/%s" % itemid
    try:
        stats.begin("archive.org", url=url)
        text = urllib2.urlopen(url).read()
        stats.end()
        return simplejson.loads(text)
    except (IOError, ValueError):
        return None
예제 #26
0
 def memcache_get(self, args, kw):
     """Reads the value from memcache. Key is computed from the arguments.
     
     Returns (value, time) when the value is available, None otherwise.
     """
     key = self.compute_key(args, kw)
     stats.begin("memcache.get", key=key)
     json = self.memcache.get(key)
     stats.end(hit=bool(json))
     
     return json and self.json_decode(json)
예제 #27
0
파일: ia.py 프로젝트: rexzing/openlibrary
def get_item_manifest(item_id, item_server, item_path):
    url = 'https://%s/BookReader/BookReaderJSON.php' % item_server
    url += '?itemPath=%s&itemId=%s&server=%s' % (item_path, item_id, item_server)
    try:
        stats.begin('archive.org', url=url)
        manifest = requests.get(url)
        stats.end()
        return manifest.json()
    except IOError:
        stats.end()
        return {}
예제 #28
0
def get_item_manifest(item_id, item_server, item_path):
    url = 'https://%s/BookReader/BookReaderJSON.php' % item_server
    url += "?itemPath=%s&itemId=%s&server=%s" % (item_path, item_id, item_server)
    try:
        stats.begin("archive.org", url=url)
        manifest_json = urllib2.urlopen(url).read()
        stats.end()
        return simplejson.loads(manifest_json)
    except IOError:
        stats.end()
        return {}
예제 #29
0
파일: code.py 프로젝트: ahvigil/openlibrary
def inside_solr_select(params):
    params.setdefault("wt", "json")
    #solr_select = solr_select_url + '?' + '&'.join("%s=%s" % (k, unicode(v)) for k, v in params)
    solr_select = solr_select_url + "?" + urllib.urlencode(params)
    stats.begin("solr", url=solr_select)

    try:
        json_data = urlopen(solr_select).read()
    except IOError, e:
        logger.error("Unable to query search inside solr", exc_info=True)
        return {"error": web.htmlquote(str(e))}
예제 #30
0
    def memcache_get(self, args, kw):
        """Reads the value from memcache. Key is computed from the arguments.

        Returns (value, time) when the value is available, None otherwise.
        """
        key = self.compute_key(args, kw)
        stats.begin("memcache.get", key=key)
        json = self.memcache.get(key)
        stats.end(hit=bool(json))

        return json and self.json_decode(json)
예제 #31
0
def get_item_json(itemid):
    itemid = web.safestr(itemid.strip())
    url = 'http://archive.org/metadata/%s' % itemid
    try:
        stats.begin('archive.org', url=url)
        metadata_json = urllib2.urlopen(url).read()
        stats.end()
        return simplejson.loads(metadata_json)
    except IOError:
        stats.end()
        return {}
예제 #32
0
    def _couchdb_view(self, db, viewname, **kw):
        stats.begin("couchdb", db=db.name, view=viewname, kw=kw)
        try:
            result = db.view(viewname, **kw)

            # force fetching the results
            result.rows
        finally:
            stats.end()

        return result
예제 #33
0
def inside_solr_select(params):
    params.setdefault("wt", "json")
    #solr_select = solr_select_url + '?' + '&'.join("%s=%s" % (k, unicode(v)) for k, v in params)
    solr_select = solr_select_url + "?" + urllib.urlencode(params)
    stats.begin("solr", url=solr_select)

    try:
        json_data = urlopen(solr_select).read()
    except IOError, e:
        logger.error("Unable to query search inside solr", exc_info=True)
        return {"error": web.htmlquote(str(e))}
예제 #34
0
 def get_results(q, offset=0, limit=100, snippets=3, fragsize=200):
     q = escape_bracket(q)
     solr_select = solr_select_url + "?fl=ia,body_length,page_count&hl=true&hl.fl=body&hl.snippets=%d&hl.mergeContiguous=true&hl.usePhraseHighlighter=false&hl.simple.pre={{{&hl.simple.post=}}}&hl.fragsize=%d&q.op=AND&q=%s&start=%d&rows=%d&qf=body&qt=standard&hl.maxAnalyzedChars=1000000&wt=json" % (snippets, fragsize, web.urlquote(q), offset, limit)
     stats.begin("solr", url=solr_select)
     json_data = urllib.urlopen(solr_select).read()
     stats.end()
     try:
         return simplejson.loads(json_data)
     except:
         m = re_query_parser_error.search(json_data)
         return { 'error': web.htmlunquote(m.group(1)) }
예제 #35
0
def search_inside_result_count(q):
    q = escape_q(q)
    solr_select = solr_select_url + "?fl=ia&q.op=AND&wt=json&q=" + web.urlquote(q)
    stats.begin("solr", url=solr_select)
    json_data = urllib.urlopen(solr_select).read()
    stats.end()
    try:
        results = simplejson.loads(json_data)
    except:
        return None
    return results['response']['numFound']
예제 #36
0
def get_item_manifest(item_id, item_server, item_path):
    url = 'https://%s/BookReader/BookReaderJSON.php' % item_server
    url += "?itemPath=%s&itemId=%s&server=%s" % (item_path, item_id, item_server)
    try:
        stats.begin("archive.org", url=url)
        manifest_json = urllib2.urlopen(url).read()
        stats.end()
        return simplejson.loads(manifest_json)
    except IOError:
        stats.end()
        return {}
예제 #37
0
def get_item_json(itemid):
    itemid = web.safestr(itemid.strip())
    url = 'http://archive.org/metadata/%s' % itemid
    try:
        stats.begin('archive.org', url=url)
        metadata_json = urllib2.urlopen(url).read()
        stats.end()
        return simplejson.loads(metadata_json)
    except IOError:
        stats.end()
        return {}
예제 #38
0
def _get_metadata(itemid):
    """Returns metadata by querying the archive.org metadata API.
    """
    url = "http://www.archive.org/metadata/%s" % itemid
    try:
        stats.begin("archive.org", url=url)
        text = urllib2.urlopen(url).read()
        stats.end()
        return simplejson.loads(text)
    except (IOError, ValueError):
        return None
예제 #39
0
파일: code.py 프로젝트: iefbr14/openlibrary
def sorted_work_editions(wkey, json_data=None):
    q='key:' + wkey
    if not json_data: # for testing
        solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=edition_key&qt=standard&wt=json" % q
        stats.begin("solr", url=solr_select)
        json_data = urllib.urlopen(solr_select).read()
        stats.end()
    reply = json.loads(json_data)

    if reply['response']['numFound'] == 0:
        return []
    return reply["response"]['docs'][0].get('edition_key', [])
예제 #40
0
def search_inside_result_count(q):
    q = escape_q(q)
    solr_select = solr_select_url + "?fl=ia&q.op=AND&wt=json&q=" + web.urlquote(
        q)
    stats.begin("solr", url=solr_select)
    json_data = urllib.urlopen(solr_select).read()
    stats.end()
    try:
        results = simplejson.loads(json_data)
    except:
        return None
    return results['response']['numFound']
예제 #41
0
파일: ia.py 프로젝트: rexzing/openlibrary
def _get_metadata(itemid):
    """Returns metadata by querying the archive.org metadata API.
    """
    itemid = web.safestr(itemid.strip())
    url = '%s/metadata/%s' % (IA_BASE_URL, itemid)
    try:
        stats.begin('archive.org', url=url)
        metadata = requests.get(url)
        stats.end()
        return metadata.json()
    except IOError:
        stats.end()
        return {}
예제 #42
0
 def request(self, sitename, path, method='GET', data=None):
     path = "/" + sitename + path
     web.ctx.infobase_auth_token = self.get_auth_token()
     try:
         stats.begin("infobase", path=path, method=method, data=data)
         out = server.request(path, method, data)
         stats.end()
         if 'infobase_auth_token' in web.ctx:
             self.set_auth_token(web.ctx.infobase_auth_token)
     except common.InfobaseException as e:
         stats.end(error=True)
         self.handle_error(e.status, str(e))
     return out
예제 #43
0
def execute_solr_query(
        solr_path: str,
        params: Union[dict, list[tuple[str, Any]]]) -> Optional[Response]:
    stats.begin("solr", url=f'{solr_path}?{urlencode(params)}')
    try:
        response = requests.get(solr_path, params=params, timeout=10)
        response.raise_for_status()
    except requests.HTTPError:
        logger.exception("Failed solr query")
        return None
    finally:
        stats.end()
    return response
예제 #44
0
def get_work_iaids(wkey):
    #wid = wkey.split('/')[2]
    solr_select_url = get_works_solr_select_url()
    filter = 'ia'
    q = 'key:' + wkey
    stats.begin('solr', url=wkey)
    solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=%s&qt=standard&wt=json&fq=type:work" % (q, filter)
    json_data = urllib.urlopen(solr_select).read()
    stats.end()
    print json_data
    reply = simplejson.loads(json_data)
    if reply['response']['numFound'] == 0:
        return []
    return reply["response"]['docs'][0].get(filter, [])
예제 #45
0
def get_work_iaids(wkey):
    #wid = wkey.split('/')[2]
    solr_select_url = get_solr_select_url()
    filter = 'ia'
    q = 'key:' + wkey
    stats.begin('solr', url=wkey)
    solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=%s&qt=standard&wt=json&fq=type:work" % (
        q, filter)
    reply = requests.get(solr_select).json()
    stats.end()
    print(reply)
    if reply['response']['numFound'] == 0:
        return []
    return reply["response"]['docs'][0].get(filter, [])
예제 #46
0
파일: code.py 프로젝트: xmarvy/openlibrary
def execute_solr_query(url):
    """
    Returns a requests.Response or None
    """
    stats.begin("solr", url=url)
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
    except requests.HTTPError:
        logger.exception("Failed solr query")
        return None
    finally:
        stats.end()
    return response
예제 #47
0
파일: models.py 프로젝트: iwtga/openlibrary
    def _solr_data(self):
        fields = [
            "cover_edition_key", "cover_id", "edition_key", "first_publish_year",
            "has_fulltext", "lending_edition_s", "public_scan_b", "ia"]

        solr = get_solr()
        stats.begin("solr", get=self.key, fields=fields)
        try:
            return solr.get(self.key, fields=fields)
        except Exception as e:
            logging.getLogger("openlibrary").exception("Failed to get solr data")
            return None
        finally:
            stats.end()
예제 #48
0
파일: admin.py 프로젝트: yzou/openlibrary
def _get_count_docs(ndays):
    """Returns the count docs from admin couchdb database.
    
    This function is memoized to avoid accessing couchdb for every request.
    """
    admin_db = couchdb.Database(config.admin.counts_db)
    end      = datetime.datetime.now().strftime("counts-%Y-%m-%d")
    start    = (datetime.datetime.now() - datetime.timedelta(days = ndays)).strftime("counts-%Y-%m-%d")
        
    stats.begin("couchdb")
    docs = [x.doc for x in admin_db.view("_all_docs",
                                         startkey_docid = start,
                                         endkey_docid   = end,
                                         include_docs = True)]
    stats.end()
    return docs
예제 #49
0
def get_metadata(itemid):
    itemid = web.safestr(itemid.strip())
    url = 'http://archive.org/metadata/%s' % itemid
    try:
        stats.begin("archive.org", url=url)
        metadata_json = urllib2.urlopen(url).read()
        stats.end()
        d = simplejson.loads(metadata_json)
        metadata = process_metadata_dict(d.get("metadata", {}))

        # if any of the files is access restricted, consider it as an access-restricted item.
        metadata['access-restricted'] = any(
            f.get("private") == "true" for f in d['files'])
        return metadata
    except IOError:
        stats.end()
        return {}
예제 #50
0
def find_ebook_count(field, key):
    q = '%s_key:%s+AND+(overdrive_s:*+OR+ia:*)' % (
        field, re_chars.sub(r'\\\1', key).encode('utf-8'))

    root_url = solr_select_url + '?wt=json&indent=on&rows=%d&start=%d&q.op=AND&q=%s&fl=edition_key'
    rows = 1000

    ebook_count = 0
    start = 0
    solr_url = root_url % (rows, start, q)

    stats.begin("solr", url=solr_url)
    response = json.load(urllib.urlopen(solr_url))['response']
    stats.end()

    num_found = response['numFound']
    years = defaultdict(int)
    while start < num_found:
        if start:
            solr_url = root_url % (rows, start, q)
            stats.begin("solr", url=solr_url)
            response = json.load(urllib.urlopen(solr_url))['response']
            stats.end()
        for doc in response['docs']:
            for k in doc['edition_key']:
                e = web.ctx.site.get('/books/' + k)
                ia = set(i[3:] for i in e.get('source_records', [])
                         if i.startswith('ia:'))
                if e.get('ocaid'):
                    ia.add(e['ocaid'])
                pub_date = e.get('publish_date')
                pub_year = -1
                if pub_date:
                    m = re_year.search(pub_date)
                    if m:
                        pub_year = int(m.group(1))
                ebook_count = len(ia)
                if 'overdrive' in e.get('identifiers', {}):
                    ebook_count += len(e['identifiers']['overdrive'])
                if ebook_count:
                    years[pub_year] += ebook_count
        start += rows

    return dict(years)
예제 #51
0
def works_by_author(akey, sort='editions', page=1, rows=100):
    # called by merge_author_works
    q = 'author_key:' + akey
    offset = rows * (page - 1)
    fields = [
        'key', 'author_name', 'author_key', 'title', 'subtitle',
        'edition_count', 'ia', 'cover_edition_key', 'has_fulltext',
        'first_publish_year', 'public_scan_b', 'lending_edition_s',
        'overdrive_s', 'ia_collection_s'
    ]
    fl = ','.join(fields)
    solr_select = solr_select_url + "?q.op=AND&q=%s&fq=&start=%d&rows=%d&fl=%s&wt=json" % (
        q, offset, rows, fl)
    facet_fields = [
        "author_facet", "language", "publish_year", "publisher_facet",
        "subject_facet", "person_facet", "place_facet", "time_facet"
    ]
    if sort == 'editions':
        solr_select += '&sort=edition_count+desc'
    elif sort.startswith('old'):
        solr_select += '&sort=first_publish_year+asc'
    elif sort.startswith('new'):
        solr_select += '&sort=first_publish_year+desc'
    elif sort.startswith('title'):
        solr_select += '&sort=title+asc'
    solr_select += "&facet=true&facet.mincount=1&f.author_facet.facet.sort=count&f.publish_year.facet.limit=-1&facet.limit=25&" + '&'.join(
        "facet.field=" + f for f in facet_fields)
    stats.begin("solr", url=solr_select)
    reply = json.load(urllib.urlopen(solr_select))
    stats.end()
    facets = reply['facet_counts']['facet_fields']
    works = [work_object(w) for w in reply['response']['docs']]

    def get_facet(f, limit=None):
        return list(web.group(facets[f][:limit * 2] if limit else facets[f],
                              2))

    return web.storage(
        num_found=int(reply['response']['numFound']),
        works=works,
        years=[(int(k), v) for k, v in get_facet('publish_year')],
        get_facet=get_facet,
        sort=sort,
    )
예제 #52
0
 def _get_solr_data(self):
     key = self.get_olid()
     fields = ["cover_edition_key", "cover_id", "edition_key", "first_publish_year"]
     
     solr = get_works_solr()
     stats.begin("solr", query={"key": key}, fields=fields)
     try:
         d = solr.select({"key": key}, fields=fields)
     finally:
         stats.end()
         
     if d.num_found > 0:
         w = d.docs[0]
     else:
         w = None
             
     # Replace _solr_data property with the attribute
     self.__dict__['_solr_data'] = w
     return w
예제 #53
0
    def get(self, sitename, data):
        key = data.get('key')
        revision = data.get('revision')

        if revision is None:
            stats.begin("memcache.get", key=key)
            result = self.memcache.get(key)
            stats.end(hit=bool(result))

            return result or ConnectionMiddleware.get(self, sitename, data)
        else:
            # cache get requests with revisions for a minute
            mc_key = "%s@%d" % (key, revision)
            result = self.mc_get(mc_key)
            if result is None:
                result = ConnectionMiddleware.get(self, sitename, data)
                if result:
                    self.mc_set(mc_key, result, time=60)  # cache for a minute
            return result
예제 #54
0
def get_api_response(url, params=None):
    """
    Makes an API GET request to archive.org, collects stats
    Returns a JSON dict.
    :param str url:
    :param dict params: url parameters
    :rtype: dict
    """
    api_response = {}
    stats.begin('archive.org', url=url)
    try:
        r = requests.get(url, params=params)
        if r.status_code == requests.codes.ok:
            api_response = r.json()
        else:
            logger.info('%s response received from %s' % (r.status_code, url))
    except Exception as e:
        logger.exception('Exception occurred accessing %s.' % url)
    stats.end()
    return api_response
예제 #55
0
def _get_blog_feeds():
    url = "http://blog.openlibrary.org/feed/"
    try:
        stats.begin("get_blog_feeds", url=url)
        tree = etree.parse(urllib.request.urlopen(url))
    except Exception:
        # Handle error gracefully.
        logging.getLogger("openlibrary").error("Failed to fetch blog feeds", exc_info=True)
        return []
    finally:
        stats.end()

    def parse_item(item):
        pubdate = datetime.datetime.strptime(item.find("pubDate").text, '%a, %d %b %Y %H:%M:%S +0000').isoformat()
        return dict(
            title=item.find("title").text,
            link=item.find("link").text,
            pubdate=pubdate
        )
    return [parse_item(item) for item in tree.findall("//item")]
예제 #56
0
def get_metadata(itemid):
    itemid = web.safestr(itemid.strip())
    url = 'http://archive.org/metadata/%s' % itemid
    try:
        stats.begin("archive.org", url=url)
        metadata_json = urllib2.urlopen(url).read()
        stats.end()
        d = simplejson.loads(metadata_json)
        metadata = process_metadata_dict(d.get("metadata", {}))

        if metadata:
            # if any of the files is access restricted, consider it as an access-restricted item.
            files = d.get('files', [])
            metadata['access-restricted'] = any(f.get("private") == "true" for f in files)

            # remember the filenames to construct download links
            metadata['_filenames'] = [f['name'] for f in files]
        return metadata
    except IOError:
        stats.end()
        return {}
예제 #57
0
def execute_ebook_count_query(q):
    root_url = solr_select_url + '?wt=json&indent=on&rows=%d&start=%d&q.op=AND&q=%s&fl=edition_key'
    rows = 1000

    ebook_count = 0
    start = 0
    solr_url = root_url % (rows, start, q)

    stats.begin("solr", url=solr_url)
    response = json.load(urllib.urlopen(solr_url))['response']
    stats.end()

    num_found = response['numFound']
    years = defaultdict(int)
    while start < num_found:
        if start:
            solr_url = root_url % (rows, start, q)
            stats.begin("solr", url=solr_url)
            response = json.load(urllib.urlopen(solr_url))['response']
            stats.end()
        for doc in response['docs']:
            for k in doc['edition_key']:
                e = web.ctx.site.get('/books/' + k)
                ia = set(i[3:] for i in e.get('source_records', [])
                         if i.startswith('ia:'))
                if e.get('ocaid'):
                    ia.add(e['ocaid'])
                pub_date = e.get('publish_date')
                pub_year = -1
                if pub_date:
                    m = re_year.search(pub_date)
                    if m:
                        pub_year = int(m.group(1))
                ebook_count = len(ia)
                if ebook_count:
                    years[pub_year] += ebook_count
        start += rows

    return dict(years)
예제 #58
0
 def get_results(q,
                 offset=0,
                 limit=100,
                 snippets=3,
                 fragsize=200,
                 hl_phrase=False):
     m = re_bad_fields.match(q)
     if m:
         return {'error': m.group(1) + ' search not supported'}
     q = escape_q(q)
     solr_params = [
         ('fl', 'ia,body_length,page_count'),
         ('hl', 'true'),
         ('hl.fl', 'body'),
         ('hl.snippets', snippets),
         ('hl.mergeContiguous', 'true'),
         ('hl.usePhraseHighlighter', 'true' if hl_phrase else 'false'),
         ('hl.simple.pre', '{{{'),
         ('hl.simple.post', '}}}'),
         ('hl.fragsize', fragsize),
         ('q.op', 'AND'),
         ('q', web.urlquote(q)),
         ('start', offset),
         ('rows', limit),
         ('qf', 'body'),
         ('qt', 'standard'),
         ('hl.maxAnalyzedChars', '-1'),
         ('wt', 'json'),
     ]
     solr_select = solr_select_url + '?' + '&'.join(
         "%s=%s" % (k, unicode(v)) for k, v in solr_params)
     stats.begin("solr", url=solr_select)
     json_data = urllib.urlopen(solr_select).read()
     stats.end()
     try:
         return simplejson.loads(json_data)
     except:
         m = re_query_parser_error.search(json_data)
         return {'error': web.htmlunquote(m.group(1))}
예제 #59
0
def work_search(query, limit=20, offset=0, **kw):
    """Search for works."""

    kw.setdefault("doc_wrapper", work_wrapper)
    fields = [
        "key",
        "author_name",
        "author_key",
        "title",
        "edition_count",
        "ia",
        "cover_edition_key",
        "has_fulltext",
        "subject",
        "ia_collection_s",
        "public_scan_b",
        "overdrive_s",
        "lending_edition_s",
        "lending_identifier_s",
    ]
    kw.setdefault("fields", fields)

    if config.get('single_core_solr'):
        kw.setdefault("fq", "type:work")

    query = process_work_query(query)
    solr = get_works_solr()

    stats.begin("solr", query=query, start=offset, rows=limit, kw=kw)
    try:
        result = solr.select(query, start=offset, rows=limit, **kw)
    except Exception as e:
        logging.getLogger("openlibrary").exception("Failed solr query")
        return None
    finally:
        stats.end()

    return result
예제 #60
0
    def _get_solr_data(self):
        fields = [
            "cover_edition_key", "cover_id", "edition_key", "first_publish_year",
            "has_fulltext", "lending_edition_s", "checked_out", "public_scan_b", "ia"]

        solr = get_solr()
        stats.begin("solr", query={"key": self.key}, fields=fields)
        try:
            d = solr.select({"key": self.key}, fields=fields)
        except Exception as e:
            logging.getLogger("openlibrary").exception("Failed to get solr data")
            return None
        finally:
            stats.end()

        if d.num_found > 0:
            w = d.docs[0]
        else:
            w = None

        # Replace _solr_data property with the attribute
        self.__dict__['_solr_data'] = w
        return w