Exemple #1
0
def url_encode(query, clean=True, doseq=True, **kw):
    query = web.dictadd(query, kw)
    if clean is True:
        for q, v in query.items():
            if not v:
                del query[q]
    return urllib.urlencode(query, doseq)
Exemple #2
0
    def __init__(self, from_address, to_address, subject, message, headers=None, **kw):
        def listify(x):
            if not isinstance(x, list):
                return [safestr(x)]
            else:
                return [safestr(a) for a in x]
    
        subject = safestr(subject)
        message = safestr(message)

        from_address = safestr(from_address)
        to_address = listify(to_address)    
        cc = listify(kw.get('cc', []))
        bcc = listify(kw.get('bcc', []))
        recipients = to_address + cc + bcc

        import email.Utils
        self.from_address = email.Utils.parseaddr(from_address)[1]
        self.recipients = [email.Utils.parseaddr(r)[1] for r in recipients]        
    
        self.headers = dictadd({
          'From': from_address,
          'To': ", ".join(to_address),
          'Subject': subject
        }, headers or {})

        if cc:
            self.headers['Cc'] = ", ".join(cc)
    
        self.message = self.new_message()
        self.message.add_header("Content-Transfer-Encoding", "7bit")
        self.message.add_header("Content-Disposition", "inline")
        self.message.add_header("MIME-Version", "1.0")
        self.message.set_payload(message, 'utf-8')
        self.multipart = False
Exemple #3
0
def url_encode(query, clean=True, doseq=True, **kw):
    query = web.dictadd(query, kw)
    if clean is True:
        for q, v in query.items():
            if not v:
                del query[q]
    return urllib.urlencode(query, doseq)
Exemple #4
0
 def init_webpy_app(self, mapping=(), fvars={}, autoreload=None):
     """Initializes webpy app which delegate requests based on path."""
     # the default url mapping
     mapping = mapping + MAPPING
     # not sure why this is needed
     fvars = web.dictadd(locals(), fvars)
     # create webapy app with these settings
     super(CloudMiningApp, self).__init__(mapping, fvars, autoreload)
Exemple #5
0
 def init_webpy_app(self, mapping=(), fvars={}, autoreload=None):
     """Initializes webpy app which delegate requests based on path."""
     # the default url mapping
     mapping = mapping + MAPPING
     # not sure why this is needed
     fvars = web.dictadd(locals(), fvars)
     # create webapy app with these settings
     super(CloudMiningApp, self).__init__(mapping, fvars, autoreload)
Exemple #6
0
def url_encode2(inputs, clean=True, doseq=True, **kw):
    inputs = web.dictadd(inputs, kw)
    if clean is True:
        for q, v in inputs.items():
            if not v:
                del inputs[q]
            if isinstance(v, unicode):
                inputs[q] = v.encode('utf-8')
    return urllib.urlencode(inputs, doseq)
Exemple #7
0
def url_encode2(inputs, clean=True, doseq=True, **kw):
    inputs = web.dictadd(inputs, kw)
    if clean is True:
        for q, v in inputs.items():
            if not v:
                del inputs[q]
            if isinstance(v, unicode):
                inputs[q] = v.encode('utf-8')
    return urllib.urlencode(inputs, doseq)
Exemple #8
0
 def __init__(self, db, flash, **settings):
     self._db = db
     self._flash = flash
     self.config = web.storage(web.dictadd(DEFAULT_SETTINGS, settings))
     hashtype = self.config.get('hash')
     try:
         if hashtype == 'sha512':
             self.hash = hash_sha512
         elif hashtype == 'sha1':
             self.hash = hash_sha1
         elif hashtype == 'bcrypt':
             self.hash = hash_bcrypt
         else:
             raise HashError("Hash type must be sha512, sha1 or bcrypt")
     except ImportError:
         raise HashError('Hash type %s not available' % (hash,))
Exemple #9
0
    def process_doc(self, doc):
        """Processes one document.
        Should be called only after initializing self.authors and self.works.
        """
        w = self.get_work(doc)

        def subject(name, prefix):
            # handle bad subjects loaded earlier.
            if isinstance(name, dict):
                if 'value' in name:
                    name = name['value']
                elif 'key' in name:
                    name = name['key'].split("/")[-1].replace("_", " ")
                else:
                    return {}

            return {
                "name": name,
                "url": "https://openlibrary.org/subjects/%s%s" % (prefix, name.lower().replace(" ", "_"))
            }

        def get_subjects(name, prefix):
            return [subject(s, prefix) for s in w.get(name, '')]

        def get_value(v):
            if isinstance(v, dict):
                return v.get('value', '')
            else:
                return v

        def format_excerpt(e):
            return {
                "text": get_value(e.get("excerpt", {})),
                "comment": e.get("comment", "")
            }

        def format_table_of_contents(toc):
            # after openlibrary.plugins.upstream.models.get_table_of_contents
            def row(r):
                if isinstance(r, six.string_types):
                    level = 0
                    label = ""
                    title = r
                    pagenum = ""
                else:
                    level = h.safeint(r.get('level', '0'), 0)
                    label = r.get('label', '')
                    title = r.get('title', '')
                    pagenum = r.get('pagenum', '')
                r = dict(level=level, label=label, title=title, pagenum=pagenum)
                return r
            d = [row(r) for r in toc]
            return [row for row in d if any(row.values())]

        d = {
            "url": get_url(doc),
            "key": doc['key'],
            "title": doc.get("title", ""),
            "subtitle": doc.get("subtitle", ""),

            "authors": self.get_authors(w),

            "number_of_pages": doc.get("number_of_pages", ""),
            "pagination": doc.get("pagination", ""),

            "weight": doc.get("weight", ""),

            "by_statement": doc.get("by_statement", ""),

            'identifiers': web.dictadd(doc.get('identifiers', {}), {
                'isbn_10': doc.get('isbn_10', []),
                'isbn_13': doc.get('isbn_13', []),
                'lccn': doc.get('lccn', []),
                'oclc': doc.get('oclc_numbers', []),
                'openlibrary': [doc['key'].split("/")[-1]]
            }),

            'classifications': web.dictadd(doc.get('classifications', {}), {
                'lc_classifications': doc.get('lc_classifications', []),
                'dewey_decimal_class': doc.get('dewey_decimal_class', [])
            }),

            "publishers": [{"name": p} for p in doc.get("publishers", "")],
            "publish_places": [{"name": p} for p in doc.get("publish_places", "")],
            "publish_date": doc.get("publish_date"),

            "subjects": get_subjects("subjects", ""),
            "subject_places": get_subjects("subject_places", "place:"),
            "subject_people": get_subjects("subject_people", "person:"),
            "subject_times": get_subjects("subject_times", "time:"),
            "excerpts": [format_excerpt(e) for e in w.get("excerpts", [])],

            "notes": get_value(doc.get("notes", "")),
            "table_of_contents": format_table_of_contents(doc.get("table_of_contents", [])),

            "links": [dict(title=link.get("title"), url=link['url']) for link in w.get('links', '') if link.get('url')],
        }

        for fs in [doc.get("first_sentence"), w.get('first_sentence')]:
            if fs:
                e = {
                    "text": get_value(fs),
                    "comment": "",
                    "first_sentence": True
                    }
                d['excerpts'].insert(0, e)
                break

        def ebook(doc):
            itemid = doc['ocaid']
            availability = get_ia_availability(itemid)

            d = {
                "preview_url": "https://archive.org/details/" + itemid,
                "availability": availability,
                "formats": {}
            }

            prefix = "https://archive.org/download/%s/%s" % (itemid, itemid)
            if availability == 'full':
                d["read_url"] = "https://archive.org/stream/%s" % (itemid)
                d['formats'] = {
                    "pdf": {
                        "url": prefix + ".pdf"
                    },
                    "epub": {
                        "url": prefix + ".epub"
                    },
                    "text": {
                        "url": prefix + "_djvu.txt"
                    }
                }
            elif availability == "borrow":
                d['borrow_url'] = u"https://openlibrary.org%s/%s/borrow" % (doc['key'], h.urlsafe(doc.get("title", "untitled")))
                loanstatus =  web.ctx.site.store.get('ebooks/' + doc['ocaid'], {'borrowed': 'false'})
                d['checkedout'] = (loanstatus['borrowed'] == 'true')

            return d

        if doc.get("ocaid"):
            d['ebooks'] = [ebook(doc)]

        if doc.get('covers'):
            cover_id = doc['covers'][0]
            d['cover'] = {
                "small": "https://covers.openlibrary.org/b/id/%s-S.jpg" % cover_id,
                "medium": "https://covers.openlibrary.org/b/id/%s-M.jpg" % cover_id,
                "large": "https://covers.openlibrary.org/b/id/%s-L.jpg" % cover_id,
            }

        d['identifiers'] = trim(d['identifiers'])
        d['classifications'] = trim(d['classifications'])
        return trim(d)
Exemple #10
0
    def process_doc(self, doc):
        """Processes one document.
        Should be called only after initializing self.authors and self.works.
        """
        w = self.get_work(doc)
        
        def subject(name, prefix):
            # handle bad subjects loaded earlier.
            if isinstance(name, dict):
                if 'value' in name:
                    name = name['value']
                elif 'key' in name:
                    name = name['key'].split("/")[-1].replace("_", " ")
                else:
                    return {}
                
            return {
                "name": name,
                "url": "http://openlibrary.org/subjects/%s%s" % (prefix, name.lower().replace(" ", "_"))
            }
            
        def get_subjects(name, prefix):
            return [subject(s, prefix) for s in w.get(name, '')]
            
        def get_value(v):
            if isinstance(v, dict):
                return v.get('value', '')
            else:
                return v
            
        def format_excerpt(e):
            return {
                "text": get_value(e.get("excerpt", {})),
                "comment": e.get("comment", "")
            }
                    
        d = {
            "url": get_url(doc),
            "key": doc['key'],
            "title": doc.get("title", ""),
            "subtitle": doc.get("subtitle", ""),
            
            "authors": self.get_authors(w),

            "number_of_pages": doc.get("number_of_pages", ""),
            "pagination": doc.get("pagination", ""),
            
            "weight": doc.get("weight", ""),
            
            "by_statement": doc.get("by_statement", ""),

            'identifiers': web.dictadd(doc.get('identifiers', {}), {
                'isbn_10': doc.get('isbn_10', []),
                'isbn_13': doc.get('isbn_13', []),
                'lccn': doc.get('lccn', []),
                'oclc': doc.get('oclc_numbers', []),
                'openlibrary': [doc['key'].split("/")[-1]]
            }),
            
            'classifications': web.dictadd(doc.get('classifications', {}), {
                'lc_classifications': doc.get('lc_classifications', []),
                'dewey_decimal_class': doc.get('dewey_decimal_class', [])
            }),
            
            "publishers": [{"name": p} for p in doc.get("publishers", "")],
            "publish_places": [{"name": p} for p in doc.get("publish_places", "")],
            "publish_date": doc.get("publish_date"),
            
            "subjects": get_subjects("subjects", ""),
            "subject_places": get_subjects("subject_places", "place:"),
            "subject_people": get_subjects("subject_people", "person:"),
            "subject_times": get_subjects("subject_times", "time:"),
            "excerpts": [format_excerpt(e) for e in w.get("excerpts", [])],
            "links": [dict(title=link.get("title"), url=link['url']) for link in w.get('links', '') if link.get('url')],
        }
        
        def ebook(doc):
            itemid = doc['ocaid']
            availability = get_ia_availability(itemid)
            
            d = {
                "preview_url": "http://www.archive.org/details/" + itemid,
                "availability": availability
            }
                
            prefix = "http://www.archive.org/download/%s/%s" % (itemid, itemid)
            if availability == 'full':
                d["read_url"] = "http://www.archive.org/stream/%s" % (itemid)
                d['formats'] = {
                    "pdf": {
                        "url": prefix + ".pdf"
                    },
                    "epub": {
                        "url": prefix + ".epub"
                    },
                    "text": {
                        "url": prefix + "_djvu.txt"
                    },
                    "djvu": {
                        "url": prefix + ".djvu",
                        "permission": "open"
                    }
                }
            elif availability == "borrow":
                d['borrow_url'] = u"http://openlibrary.org%s/%s/borrow" % (doc['key'], h.urlsafe(doc.get("title", "untitled")))
                d['formats'] = {
                    "djvu": {
                        "url": prefix + ".djvu",
                        "permission": "restricted"
                    }
                }
            else:
                d['formats'] = {
                    "djvu": {
                        "url": prefix + ".djvu",
                        "permission": "restricted"
                    }
                }
                
            return d

        if doc.get("ocaid"):
            d['ebooks'] = [ebook(doc)]
        
        if doc.get('covers'):
            cover_id = doc['covers'][0]
            d['cover'] = {
                "small": "http://covers.openlibrary.org/b/id/%s-S.jpg" % cover_id,
                "medium": "http://covers.openlibrary.org/b/id/%s-M.jpg" % cover_id,
                "large": "http://covers.openlibrary.org/b/id/%s-L.jpg" % cover_id,
            }

        d['identifiers'] = trim(d['identifiers'])
        d['classifications'] = trim(d['classifications'])
        return trim(d)
Exemple #11
0
    def process_doc(self, doc):
        """Processes one document.
        Should be called only after initializing self.authors and self.works.
        """
        w = self.get_work(doc)

        def subject(name, prefix):
            # handle bad subjects loaded earlier.
            if isinstance(name, dict):
                if 'value' in name:
                    name = name['value']
                elif 'key' in name:
                    name = name['key'].split("/")[-1].replace("_", " ")
                else:
                    return {}

            return {
                "name": name,
                "url": "https://openlibrary.org/subjects/%s%s" % (prefix, name.lower().replace(" ", "_"))
            }

        def get_subjects(name, prefix):
            return [subject(s, prefix) for s in w.get(name, '')]

        def get_value(v):
            if isinstance(v, dict):
                return v.get('value', '')
            else:
                return v

        def format_excerpt(e):
            return {
                "text": get_value(e.get("excerpt", {})),
                "comment": e.get("comment", "")
            }

        def format_table_of_contents(toc):
            # after openlibrary.plugins.upstream.models.get_table_of_contents
            def row(r):
                if isinstance(r, basestring):
                    level = 0
                    label = ""
                    title = r
                    pagenum = ""
                else:
                    level = h.safeint(r.get('level', '0'), 0)
                    label = r.get('label', '')
                    title = r.get('title', '')
                    pagenum = r.get('pagenum', '')
                r = dict(level=level, label=label, title=title, pagenum=pagenum)
                return r
            d = [row(r) for r in toc]
            return [row for row in d if any(row.values())]

        d = {
            "url": get_url(doc),
            "key": doc['key'],
            "title": doc.get("title", ""),
            "subtitle": doc.get("subtitle", ""),

            "authors": self.get_authors(w),

            "number_of_pages": doc.get("number_of_pages", ""),
            "pagination": doc.get("pagination", ""),

            "weight": doc.get("weight", ""),

            "by_statement": doc.get("by_statement", ""),

            'identifiers': web.dictadd(doc.get('identifiers', {}), {
                'isbn_10': doc.get('isbn_10', []),
                'isbn_13': doc.get('isbn_13', []),
                'lccn': doc.get('lccn', []),
                'oclc': doc.get('oclc_numbers', []),
                'openlibrary': [doc['key'].split("/")[-1]]
            }),

            'classifications': web.dictadd(doc.get('classifications', {}), {
                'lc_classifications': doc.get('lc_classifications', []),
                'dewey_decimal_class': doc.get('dewey_decimal_class', [])
            }),

            "publishers": [{"name": p} for p in doc.get("publishers", "")],
            "publish_places": [{"name": p} for p in doc.get("publish_places", "")],
            "publish_date": doc.get("publish_date"),

            "subjects": get_subjects("subjects", ""),
            "subject_places": get_subjects("subject_places", "place:"),
            "subject_people": get_subjects("subject_people", "person:"),
            "subject_times": get_subjects("subject_times", "time:"),
            "excerpts": [format_excerpt(e) for e in w.get("excerpts", [])],

            "notes": get_value(doc.get("notes", "")),
            "table_of_contents": format_table_of_contents(doc.get("table_of_contents", [])),

            "links": [dict(title=link.get("title"), url=link['url']) for link in w.get('links', '') if link.get('url')],
        }

        for fs in [doc.get("first_sentence"), w.get('first_sentence')]:
            if fs:
                e = {
                    "text": get_value(fs),
                    "comment": "",
                    "first_sentence": True
                    }
                d['excerpts'].insert(0, e)
                break

        def ebook(doc):
            itemid = doc['ocaid']
            availability = get_ia_availability(itemid)

            d = {
                "preview_url": "https://archive.org/details/" + itemid,
                "availability": availability,
                "formats": {}
            }

            prefix = "https://archive.org/download/%s/%s" % (itemid, itemid)
            if availability == 'full':
                d["read_url"] = "https://archive.org/stream/%s" % (itemid)
                d['formats'] = {
                    "pdf": {
                        "url": prefix + ".pdf"
                    },
                    "epub": {
                        "url": prefix + ".epub"
                    },
                    "text": {
                        "url": prefix + "_djvu.txt"
                    }
                }
            elif availability == "borrow":
                d['borrow_url'] = u"https://openlibrary.org%s/%s/borrow" % (doc['key'], h.urlsafe(doc.get("title", "untitled")))
                loanstatus =  web.ctx.site.store.get('ebooks/' + doc['ocaid'], {'borrowed': 'false'})
                d['checkedout'] = (loanstatus['borrowed'] == 'true')

            return d

        if doc.get("ocaid"):
            d['ebooks'] = [ebook(doc)]

        if doc.get('covers'):
            cover_id = doc['covers'][0]
            d['cover'] = {
                "small": "https://covers.openlibrary.org/b/id/%s-S.jpg" % cover_id,
                "medium": "https://covers.openlibrary.org/b/id/%s-M.jpg" % cover_id,
                "large": "https://covers.openlibrary.org/b/id/%s-L.jpg" % cover_id,
            }

        d['identifiers'] = trim(d['identifiers'])
        d['classifications'] = trim(d['classifications'])
        return trim(d)
Exemple #12
0
    def process_doc(self, doc):
        """Processes one document.
        Should be called only after initializing self.authors and self.works.
        """
        w = self.get_work(doc)
        
        def subject(name, prefix):
            # handle bad subjects loaded earlier.
            if isinstance(name, dict):
                if 'value' in name:
                    name = name['value']
                elif 'key' in name:
                    name = name['key'].split("/")[-1].replace("_", " ")
                else:
                    return {}
                
            return {
                "name": name,
                "url": "http://openlibrary.org/subjects/%s%s" % (prefix, name.lower().replace(" ", "_"))
            }
            
        def get_subjects(name, prefix):
            return [subject(s, prefix) for s in w.get(name, '')]
            
        def get_value(v):
            if isinstance(v, dict):
                return v.get('value', '')
            else:
                return v
            
        def format_excerpt(e):
            return {
                "text": get_value(e.get("excerpt", {})),
                "comment": e.get("comment", "")
            }
                    
        d = {
            "url": get_url(doc),
            "title": doc.get("title", ""),
            "subtitle": doc.get("subtitle", ""),
            
            "authors": self.get_authors(w),

            "number_of_pages": doc.get("number_of_pages", ""),
            "weight": doc.get("weight", ""),

            'identifiers': web.dictadd(doc.get('identifiers', {}), {
                'isbn_10': doc.get('isbn_10', []),
                'isbn_13': doc.get('isbn_13', []),
                'lccn': doc.get('lccn', []),
                'oclc': doc.get('oclc_numbers', []),
            }),
            
            'classifications': web.dictadd(doc.get('classifications', {}), {
                'lc_classifications': doc.get('lc_classifications', []),
                'dewey_decimal_class': doc.get('dewey_decimal_class', [])
            }),
            
            "publishers": [{"name": p} for p in doc.get("publishers", "")],
            "publish_places": [{"name": p} for p in doc.get("publish_places", "")],
            "publish_date": doc.get("publish_date"),
            
            "subjects": get_subjects("subjects", ""),
            "subject_places": get_subjects("subject_places", "place:"),
            "subject_people": get_subjects("subject_people", "person:"),
            "subject_times": get_subjects("subject_times", "time:"),
            "excerpts": [format_excerpt(e) for e in w.get("excerpts", [])],
            "links": [dict(title=link.get("title"), url=link['url']) for link in w.get('links', '') if link.get('url')],
        }

        if doc.get("ocaid"):
            d['ebooks'] = [{"preview_url": "http://www.archive.org/details/" + doc['ocaid']}]
        
        if doc.get('covers'):
            cover_id = doc['covers'][0]
            d['cover'] = {
                "small": "http://covers.openlibrary.org/b/id/%s-S.jpg" % cover_id,
                "medium": "http://covers.openlibrary.org/b/id/%s-M.jpg" % cover_id,
                "large": "http://covers.openlibrary.org/b/id/%s-L.jpg" % cover_id,
            }

        d['identifiers'] = trim(d['identifiers'])
        d['classifications'] = trim(d['classifications'])
        return trim(d)