Ejemplo n.º 1
0
 def _load_solrdata(self):
     if self.type == "edition":
         return {
             'ebook_count': int(bool(self.document.ocaid)),
             'edition_count': 1, 
             'work_count': 1,
             'last_update': self.document.last_modified
         }
     else:
         q = self.get_solr_query_term()
         if q:
             solr = get_works_solr()
             result = solr.select(q, fields=["edition_count", "ebook_count_i"])
             last_update_i = [doc['last_update_i'] for doc in result.docs if 'last_update_i' in doc]
             if last_update_i:
                 last_update = self._inttime_to_datetime(last_update_i)
             else:
                 # if last_update is not present in solr, consider last_modfied of
                 # that document as last_update
                 if self.type in ['work', 'author']:
                     last_update = self.document.last_modified
                 else:
                     last_update = None
             return {
                 'ebook_count': sum(doc.get('ebook_count_i', 0) for doc in result.docs),
                 'edition_count': sum(doc.get('edition_count', 0) for doc in result.docs),
                 'work_count': 0,
                 'last_update': last_update
             }
     return {}
Ejemplo n.º 2
0
def random_ebooks(limit=2000):
    solr = search.get_works_solr()
    sort = "edition_count desc"
    result = solr.select(
        query='has_fulltext:true -public_scan_b:false', 
        rows=limit, 
        sort=sort,
        fields=[
            'has_fulltext',
            'key',
            'ia',
            "title",
            "cover_edition_key",
            "author_key", "author_name",
        ])
    
    def process_doc(doc):
        d = {}
        d['url'] = "/works/" + doc['key']
        d['title'] = doc.get('title', '')
        
        if 'author_key' in doc and 'author_name' in doc:
            d['authors'] = [{"key": key, "name": name} for key, name in zip(doc['author_key'], doc['author_name'])]
            
        if 'cover_edition_key' in doc:
            d['cover_url'] = h.get_coverstore_url() + "/b/olid/%s-M.jpg" % doc['cover_edition_key']
            
        d['read_url'] = "//archive.org/stream/" + doc['ia'][0]
        return d
        
    return [process_doc(doc) for doc in result['docs'] if doc.get('ia')]
Ejemplo n.º 3
0
def random_ebooks(limit=2000):
    solr = search.get_works_solr()
    sort = "edition_count desc"
    result = solr.select(
        query="has_fulltext:true -public_scan_b:false",
        rows=limit,
        sort=sort,
        fields=["has_fulltext", "key", "ia", "title", "cover_edition_key", "author_key", "author_name"],
    )

    def process_doc(doc):
        d = {}

        key = doc["key"]
        # New solr stores the key as /works/OLxxxW
        if not key.startswith("/works/"):
            key = "/works/" + key

        d["url"] = key
        d["title"] = doc.get("title", "")

        if "author_key" in doc and "author_name" in doc:
            d["authors"] = [{"key": key, "name": name} for key, name in zip(doc["author_key"], doc["author_name"])]

        if "cover_edition_key" in doc:
            d["cover_url"] = h.get_coverstore_url() + "/b/olid/%s-M.jpg" % doc["cover_edition_key"]

        d["read_url"] = "//archive.org/stream/" + doc["ia"][0]
        return d

    return [process_doc(doc) for doc in result["docs"] if doc.get("ia")]
Ejemplo n.º 4
0
def random_ebooks(limit=2000):
    solr = search.get_works_solr()
    sort = "edition_count desc"
    result = solr.select(query='has_fulltext:true -public_scan_b:false',
                         rows=limit,
                         sort=sort,
                         fields=[
                             'has_fulltext',
                             'key',
                             'ia',
                             "title",
                             "cover_edition_key",
                             "author_key",
                             "author_name",
                         ])

    def process_doc(doc):
        d = {}
        d['url'] = "/works/" + doc['key']
        d['title'] = doc.get('title', '')

        if 'author_key' in doc and 'author_name' in doc:
            d['authors'] = [{
                "key": key,
                "name": name
            } for key, name in zip(doc['author_key'], doc['author_name'])]

        if 'cover_edition_key' in doc:
            d['cover_url'] = h.get_coverstore_url(
            ) + "/b/olid/%s-M.jpg" % doc['cover_edition_key']

        d['read_url'] = "http://www.archive.org/stream/" + doc['ia'][0]
        return d

    return [process_doc(doc) for doc in result['docs'] if doc.get('ia')]
Ejemplo n.º 5
0
 def _load_solrdata(self):
     if self.type == "edition":
         return {
             'ebook_count': int(bool(self.document.ocaid)),
             'edition_count': 1, 
             'work_count': 1,
             'last_update': self.document.last_modified
         }
     else:
         q = self.get_solr_query_term()
         if q:
             solr = get_works_solr()
             result = solr.select(q, fields=["edition_count", "ebook_count_i"])
             last_update_i = [doc['last_update_i'] for doc in result.docs if 'last_update_i' in doc]
             if last_update_i:
                 last_update = self._inttime_to_datetime(last_update_i)
             else:
                 # if last_update is not present in solr, consider last_modfied of
                 # that document as last_update
                 if self.type in ['work', 'author']:
                     last_update = self.document.last_modified
                 else:
                     last_update = None
             return {
                 'ebook_count': sum(doc.get('ebook_count_i', 0) for doc in result.docs),
                 'edition_count': sum(doc.get('edition_count', 0) for doc in result.docs),
                 'work_count': 0,
                 'last_update': last_update
             }
     return {}
Ejemplo n.º 6
0
 def _get_edition_keys_from_solr(self, query_terms):
     if not query_terms:
         return
     q = " OR ".join(query_terms)
     solr = get_works_solr()
     result = solr.select(q, fields=["edition_key"], rows=10000)
     for doc in result['docs']:
         for k in doc['edition_key']:
             yield "/books/" + k
Ejemplo n.º 7
0
 def _get_edition_keys_from_solr(self, query_terms):
     if not query_terms:
         return
     q = " OR ".join(query_terms)
     solr = get_works_solr()
     result = solr.select(q, fields=["edition_key"], rows=10000)
     for doc in result['docs']:
         for k in doc['edition_key']:
             yield "/books/" + k
Ejemplo n.º 8
0
 def get_solr_query_term(self):
     if self.type == 'edition':
         return "edition_key:" + self._get_document_basekey()
     elif self.type == 'work':
         return 'key:/works/' + self._get_document_basekey()
     elif self.type == 'author':
         return "author_key:" + self._get_document_basekey()
     elif self.type == 'subject':
         type, value = self.key.split(":", 1)
         # escaping value as it can have special chars like : etc.
         value = get_works_solr().escape(value)
         return "%s_key:%s" % (type, value)
Ejemplo n.º 9
0
 def get_solr_query_term(self):
     if self.type == 'edition':
         return "edition_key:" + self._get_document_basekey()
     elif self.type == 'work':
         return 'key:' + self._get_document_basekey()
     elif self.type == 'author':
         return "author_key:" + self._get_document_basekey()
     elif self.type == 'subject':
         type, value = self.key.split(":", 1)
         # escaping value as it can have special chars like : etc.
         value = get_works_solr().escape(value)
         return "%s_key:%s" % (type, value)
Ejemplo n.º 10
0
    def _get_all_subjects(self):
        solr = get_works_solr()
        q = self._get_solr_query_for_subjects()

        # Solr has a maxBooleanClauses constraint there too many seeds, the
        if len(self.seeds) > 500:
            logger.warn(
                "More than 500 seeds. skipping solr query for finding subjects."
            )
            return []

        facet_names = [
            'subject_facet', 'place_facet', 'person_facet', 'time_facet'
        ]
        try:
            result = solr.select(q,
                                 fields=[],
                                 facets=facet_names,
                                 facet_limit=20,
                                 facet_mincount=1)
        except IOError:
            logger.error("Error in finding subjects of list %s",
                         self.key,
                         exc_info=True)
            return []

        def get_subject_prefix(facet_name):
            name = facet_name.replace("_facet", "")
            if name == 'subject':
                return ''
            else:
                return name + ":"

        def process_subject(facet_name, title, count):
            prefix = get_subject_prefix(facet_name)
            key = prefix + title.lower().replace(" ", "_")
            url = "/subjects/" + key
            return web.storage({
                "title": title,
                "name": title,
                "count": count,
                "key": key,
                "url": url
            })

        def process_all():
            facets = result['facets']
            for k in facet_names:
                for f in facets.get(k, []):
                    yield process_subject(f.name, f.value, f.count)

        return sorted(process_all(), reverse=True, key=lambda s: s["count"])
Ejemplo n.º 11
0
    def _get_all_subjects(self):
        solr = get_works_solr()
        q = self._get_solr_query_for_subjects()

        # Solr has a maxBooleanClauses constraint there too many seeds, the 
        if len(self.seeds) > 500:
            logger.warn("More than 500 seeds. skipping solr query for finding subjects.")
            return []

        facet_names = ['subject_facet', 'place_facet', 'person_facet', 'time_facet']
        try:
            result = solr.select(q, 
                fields=[], 
                facets=facet_names,
                facet_limit=20,
                facet_mincount=1)
        except IOError:
            logger.error("Error in finding subjects of list %s", self.key, exc_info=True)
            return []

        def get_subject_prefix(facet_name):
            name = facet_name.replace("_facet", "")
            if name == 'subject':
                return ''
            else:
                return name + ":"

        def process_subject(facet_name, title, count):
            prefix = get_subject_prefix(facet_name)
            key = prefix + title.lower().replace(" ", "_")
            url = "/subjects/" + key
            return web.storage({
                "title": title,
                "name": title,
                "count": count,
                "key": key,
                "url": url
            })

        def process_all():
            facets = result['facets']
            for k in facet_names:
                for f in facets.get(k, []):
                    yield process_subject(f.name, f.value, f.count)

        return sorted(process_all(), reverse=True, key=lambda s: s["count"])
Ejemplo n.º 12
0
 def GET(self):
     from openlibrary.plugins.worksearch.search import get_works_solr
     result = get_works_solr().select(query='borrowed_b:false', fields=['key', 'lending_edition_s'], limit=100)
     
     def make_doc(d):
         # Makes a store doc from solr doc
         return {
             "_key": "ebooks/books/" + d['lending_edition_s'],
             "_rev": None, # Don't worry about consistancy
             "type": "ebook",
             "book_key": "/books/" + d['lending_edition_s'],
             "borrowed": "false"
         }
     
     docs = [make_doc(d) for d in result['docs']]
     docdict = dict((d['_key'], d) for d in docs)
     web.ctx.site.store.update(docdict)
     return delegate.RawText("ok\n")
Ejemplo n.º 13
0
    def _get_all_subjects(self):
        solr = get_works_solr()
        q = self._get_solr_query_for_subjects()
        facet_names = ['subject_facet', 'place_facet', 'person_facet', 'time_facet']
        result = solr.select(q, 
            fields=[], 
            facets=facet_names,
            facet_limit=20,
            facet_mincount=1)

        def get_subject_prefix(facet_name):
            name = facet_name.replace("_facet", "")
            if name == 'subject':
                return ''
            else:
                return name + ":"

        def process_subject(facet_name, title, count):
            prefix = get_subject_prefix(facet_name)
            key = prefix + title.lower().replace(" ", "_")
            url = "/subjects/" + key
            return web.storage({
                "title": title,
                "name": title,
                "count": count,
                "key": key,
                "url": url
            })

        def process_all():
            facets = result['facets']
            for k in facet_names:
                for f in facets.get(k, []):
                    yield process_subject(f.name, f.value, f.count)

        return sorted(process_all(), reverse=True, key=lambda s: s["count"])