Ejemplo n.º 1
0
 def _edition(self):
     if self.metadata.get("_edition", ''):
         return unicodestr(self.metadata["_edition"])
     elif self.get_one_identifier('isbn'):
         return unicodestr(
             self.get_one_identifier('isbn'))  #use first isbn if available
     elif self._repo:
         return edition_name_from_repo(self._repo)
     else:
         return 'book'  #this will be the default file name
Ejemplo n.º 2
0
 def set_edition_id(self):
     # set a (hopefully globally unique) edition identifier
     if not 'edition_identifiers' in self.metadata:
         self.metadata['edition_identifiers'] = {}
     base = self.url
     if not base:
         try:
             base = unicodestr(self.identifiers.keys[0]) + ':' + unicodestr(
                 self.identifiers.values[0])
         except:
             base = u'repo:' + unicodestr(self._repo)
     self.metadata['edition_identifiers'][
         'edition_id'] = base + '#' + self._edition
Ejemplo n.º 3
0
def htm_modified(file_path):
    g=rdflib.Graph()
    try:
        g.load(file_path)
    except IOError:
        return None

    ld = serializer.from_rdf(g, context_data=context, base=None,
            use_native_types=False, use_rdf_type=False,
            auto_compact=False, startnode=None, index=False)

    graph = ld['@graph']
    nodes = {}
    for obj in graph:
        if isinstance(obj,dict):
            obj = obj.copy()
            if "@id" in obj and obj["@id"].startswith("_"):
                nodeid = obj["@id"]
                node = nodes.get(nodeid,{})
                del obj["@id"]
                node.update(obj)
                nodes[nodeid] = node
            
    # now remove the blank nodes and the files
    newnodes = []
    top = None
    for obj in unblank_node(graph,nodes):
        try:
            #
            if obj[u'@type']== u'pgterms:file':
                if unicodestr(obj[u'@id']).endswith('.htm'):
                    return obj[u'dcterms:modified' ][u'@value']
                
        except:
            pass
Ejemplo n.º 4
0
def get_url(key, val, entities=None):
    if isinstance(val,list):
        return (key,[get_url(key, item, entities=None)[1] for item in val])
    try:
        return (key, unicodestr(val['@id']))
    except KeyError:
        return None
Ejemplo n.º 5
0
def strip_controls(_string):
    out = []
    _string = unicodestr(_string)
    for ch in _string:
        if unicodedata.category(ch)[0] != 'C':  # not a control character
            out.append(ch)
        elif ch in u'\r\n\t':  # allow whitespace
            out.append(ch)
    return u''.join(out)
Ejemplo n.º 6
0
def pg_rdf_to_json(file_path):
    g=rdflib.Graph()
    g.load(file_path)

    #print(g.serialize(format='json-ld', indent=4, context=context))
    ld = serializer.from_rdf(g, context_data=context, base=None,
            use_native_types=False, use_rdf_type=False,
            auto_compact=False, startnode=None, index=False)


    graph = ld['@graph']
    #print(json.dumps(graph,indent=2, separators=(',', ': '), sort_keys=True))

    nodes = {}
    for obj in graph:
        if isinstance(obj,dict):
            obj = obj.copy()
            if "@id" in obj and obj["@id"].startswith("_"):
                nodeid = obj["@id"]
                node = nodes.get(nodeid,{})
                del obj["@id"]
                node.update(obj)
                nodes[nodeid] = node
            
    # now remove the blank nodes and the files
    newnodes = []
    top = None
    for obj in unblank_node(graph,nodes):
        try:
            #
            if obj['@type']== 'pgterms:file':
                continue
            elif obj['@type']== 'pgterms:ebook':
                top = obj
            elif '@id' in obj and (unicodestr(obj['@id'])=='http://www.gutenberg.org/'):
                continue
            else:
                newnodes.append(obj)
        except KeyError:
            continue

    #print(json.dumps(top,indent=2, separators=(',', ': '), sort_keys=True))
    
    entities={}                   
    for node in newnodes:
        node_id=node.get('@id',None)
        if node_id:
            entities[node_id]=mapdata(node,pandata_map,entities)
    for adder in pandata_adders:
        adder(top,entities)
    top2 = mapdata(top, pandata_map, entities)     
    for postprocessor in postprocessors:
        postprocessor(top2)
    return top2
Ejemplo n.º 7
0
    def __init__(self, path, threaded=True):
        LockBase.__init__(self, path, threaded)
        self.lock_file = unicodestr(self.lock_file)
        self.unique_name = unicodestr(self.unique_name)

        import sqlite3
        self.connection = sqlite3.connect(SQLiteFileLock.testdb)

        c = self.connection.cursor()
        try:
            c.execute("create table locks"
                      "("
                      "   lock_file varchar(32),"
                      "   unique_name varchar(32)"
                      ")")
        except sqlite3.OperationalError:
            pass
        else:
            self.connection.commit()
            import atexit
            atexit.register(os.unlink, SQLiteFileLock.testdb)
Ejemplo n.º 8
0
 def asciify(_title):
     _title = unicodedata.normalize('NFD', unicodestr(_title))
     ascii = True
     out = []
     ok = u"1234567890qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM- ',"
     for ch in _title:
         if ch in ok:
             out.append(ch)
         elif unicodedata.category(ch)[0] == ("L"):  #a letter
             out.append(hex(ord(ch)))
             ascii = False
         elif ch in u'\r\n\t':
             out.append(u'-')
     return (ascii, sub("[ ',-]+", '-', "".join(out)))
Ejemplo n.º 9
0
 def get_repos_to_upload(self):
     pg_id = int(last_pgid) + 1
     more = True
     pg_ids = []
     while more:
         new_rdffile = os.path.join(self.rdf_library_dir, 'cache', 'epub',
                                    unicodestr(pg_id),
                                    'pg{}.rdf'.format(pg_id))
         if os.path.exists(new_rdffile):
             pg_ids.append(pg_id)
             pg_id += 1
         else:
             if pg_id in missing_pgid:
                 pg_id += 1
             else:
                 more = False
     return pg_ids
Ejemplo n.º 10
0
    def _update_tags(self, runinfo):
        if not runinfo:
            return

        tags_to_add = []

        for choice in self.question.choices():
            tags = choice.tags
            if not tags:
                continue
            tags = tags.split(',')
            runinfo.remove_tags(tags)

            for split_answer in self.split_answer():
                if unicodestr(split_answer) == choice.value:
                    tags_to_add.extend(tags)

        runinfo.add_tags(tags_to_add)
        runinfo.save()
Ejemplo n.º 11
0
 def test_new_epub(self):
     self.randomf = '%012x.epub' % random.randrange(16**12)  # random name
     epub = EPUB(self.randomf, mode='w')
     epub.addmetadata('test', 'GOOD')
     uxml = u'<?xml version="1.0" encoding="utf-8" standalone="yes"?><test>VojtěchVojtíšek</test>'
     part = StringIO(unicodestr(uxml))
     epub.addpart(part, "testpart.xhtml", "application/xhtml+xml", 2)
     epub.close()
     epub = EPUB(self.randomf, mode='r')
     self.assertEqual(len(epub.opf), 4)  # opf lenght
     self.assertEqual(len(epub.opf[0]), 6)  # metadata
     self.assertEqual(len(epub.opf[1]), 2)  # manifest
     self.assertEqual(len(epub.opf[2]), 1)  # spine
     self.assertEqual(len(epub.opf[3]), 0)  # guide
     with open(self.randomf, 'r+b') as epub_from_file:
         epub = EPUB(epub_from_file, mode='a')
         epub.addpart(part, "testpart2.xhtml", "application/xhtml+xml", 3)
         epub.close()
     epub = EPUB(self.randomf, mode='a')
     epub.addmetadata('test2', 'GOOD')
     epub.writetodisk('testwrite.epub')
Ejemplo n.º 12
0
    def additem(self, fileObject, href, mediatype):
        """
        Add a file to manifest only

        :type fileObject: BytesIO
        :param fileObject:
        :type href: unicodestr
        :param href:
        :type mediatype: unicodestr
        :param mediatype:
        """
        assert self.epub_mode != "r", "%s is not writable" % self
        element = ET.Element(
            NAMESPACE.get("opf") + "item",
            attrib={"id": "id_" + unicodestr(uuid.uuid4())[:5], "href": href, "media-type": mediatype}
        )

        try:
            self._writestr(os.path.join(self.root_folder, element.attrib["href"]), fileObject.getvalue().encode('utf-8'))
        except AttributeError:
            self._writestr(os.path.join(self.root_folder, element.attrib["href"]), fileObject)
        self.opf[1].append(element)
        return element.attrib["id"]
Ejemplo n.º 13
0
def question_timeperiod(request, question):
    cd = question.getcheckdict()
    if "units" in cd:
        units = cd["units"].split(',')
    else:
        units = ["day", "week", "month", "year"]
    timeperiods = []
    if not units:
        units = ["day", "week", "month", "year"]

    key1 = "question_%s" % question.number
    key2 = "question_%s_unit" % question.number
    value = request.POST.get(key1, '')
    unitselected = request.POST.get(key2, units[0])

    for x in units:
        if x in perioddict:
            timeperiods.append(
                (x, unicodestr(perioddict[x]), unitselected == x))
    return {
        "required": "required" in cd,
        "timeperiods": timeperiods,
        "value": value,
    }
Ejemplo n.º 14
0
def get_id(key, val, entities=None) :
    return (key,unicodestr(val))
Ejemplo n.º 15
0
def send_now(users, label, extra_context=None, on_site=True, sender=None):
    """
    Creates a new notice.
    
    This is intended to be how other apps create new notices.
    
    notification.send(user, "friends_invite_sent", {
        "spam": "eggs",
        "foo": "bar",
    )
    
    You can pass in on_site=False to prevent the notice emitted from being
    displayed on the site.
    """
    if extra_context is None:
        extra_context = {}

    notice_type = NoticeType.objects.get(label=label)

    protocol = getattr(settings, "DEFAULT_HTTP_PROTOCOL", "http")
    current_site = Site.objects.get_current()

    notices_url = u"%s://%s%s" % (
        protocol,
        unicodestr(current_site),
        reverse("notification_notices"),
    )

    current_language = get_language()

    formats = (
        "short.txt",
        "full.txt",
        "notice.html",
        "full.html",
    )  # TODO make formats configurable

    for user in users:
        recipients = []
        # get user language for user from language store defined in
        # NOTIFICATION_LANGUAGE_MODULE setting
        try:
            language = get_notification_language(user)
        except LanguageStoreNotAvailable:
            language = None

        if language is not None:
            # activate the user's language
            activate(language)

        # update context with user specific translations
        context = {
            "recipient": user,
            "sender": sender,
            "notice": ugettext(notice_type.display),
            "notices_url": notices_url,
            "current_site": current_site,
        }
        context.update(extra_context)

        # get prerendered format messages
        messages = get_formatted_messages(formats, label, context)

        context['message'] = messages["short.txt"]
        subject = render_to_string("notification/email_subject.txt", context)
        # Strip newlines from subject
        subject = "".join(subject.splitlines())
        context['message'] = messages["full.txt"]
        body = render_to_string("notification/email_body.txt", context)

        notice = Notice.objects.create(recipient=user,
                                       message=messages["notice.html"],
                                       notice_type=notice_type,
                                       on_site=on_site,
                                       sender=sender)
        if should_send(user, notice_type,
                       "1") and user.email and user.is_active:  # Email
            recipients.append(user.email)
        send_mail(subject, body, settings.DEFAULT_FROM_EMAIL, recipients)

    # reset environment to original language
    activate(current_language)
Ejemplo n.º 16
0
def stub(pandata):

    record = pymarc.Record(force_utf8=True)

    now = datetime.now()

    #mostly to identify this record as a 'stub'
    record.add_ordered_field(
        pymarc.Field(tag='001', data='stb' + now.strftime('%y%m%d%H%M%S')))

    add_stuff(record)

    # fun fun fun 008
    new_field_value = now.strftime('%y%m%d') + 's'
    publication_date = pandata.gutenberg_issued  # library cataloging will consider "Project Gutenberg" to be the publisher of the edition
    if publication_date and len(
            publication_date) > 3:  # must be at least a year
        new_field_value += publication_date[0:4]
    else:
        new_field_value += '||||'
    new_field_value += '||||xx |||||o|||||||||||eng||'
    record.add_ordered_field(pymarc.Field(tag='008', data=new_field_value))

    identifiers = pandata.identifiers
    # add IBSNs if available
    isbn = identifiers.get(
        'isbn', None)  # most isbns in PG are really for related editions
    if isbn:
        record.add_ordered_field(
            pymarc.Field(tag='020',
                         indicators=[' ', ' '],
                         subfields=['a', isbn]))
    related = identifiers.get('isbns_related', [])
    for isbn in related:
        record.add_ordered_field(
            pymarc.Field(tag='020',
                         indicators=[' ', ' '],
                         subfields=['a', isbn + ' (related)']))

    # OCLC number
    oclc = identifiers.get('oclc', None)
    if oclc:
        record.add_ordered_field(
            pymarc.Field(tag='035',
                         indicators=[' ', ' '],
                         subfields=['a', '(OCoLC)' + str(oclc)]))

    # contributors
    # use marc codes from http://www.loc.gov/marc/relators/relaterm.html
    creators = []
    # heuristically decide the "main entry", the first creator
    for marc_type in main_entries:
        creator = pandata.creator.get(marc_rels.get(marc_type), None)
        if creator:
            creators.append((marc_type, creator))
        else:
            creator = pandata.creator.get(marc_rels.get(plural(marc_type)), [])
            for each_creator in creator:
                creators.append((marc_type, each_creator))

    if creators:
        (marc_code, creator) = creators[0]
        sortname = creator.get('agent_sortname', '')
        if not sortname:
            sortname = reverse_name(creator.get('agent_name', ''))
        record.add_ordered_field(
            pymarc.Field(tag='100',
                         indicators=['1', ' '],
                         subfields=[
                             'a',
                             sortname,
                             '4',
                             marc_code,
                         ]))

    #language
    if pandata.language:
        is_translation = '1' if pandata.translators else '0'
        record.add_ordered_field(
            pymarc.Field(tag='041',
                         indicators=[is_translation, 'iso639-1'],
                         subfields=['a', pandata.language]))
    contributors = creators[1:] if creators else []
    for contributor_type in pandata.contributor.keys():
        contributor = pandata.contributor[contributor_type]  #handle plurals
        marc_code = inverse_marc_rels.get(contributor_type, 'unk')
        if contributor_type in marc_rels.values():
            #single value
            contributors.append((marc_code, contributor))
        else:
            #list
            for each_contributor in contributor:
                contributors.append((marc_code, each_contributor))

    for (marc_code, contributor) in contributors:
        sortname = contributor.get('agent_sortname', '')
        if not sortname:
            sortname = reverse_name(contributor.get('agent_name', ''))
        record.add_ordered_field(
            pymarc.Field(tag='700',
                         indicators=['1', ' '],
                         subfields=[
                             'a',
                             sortname,
                             'e',
                             marc_rels[marc_code].replace('_', ' ') + '.',
                             '4',
                             marc_code,
                         ]))

    # add subfield to 245 indicating format
    record.add_ordered_field(
        pymarc.Field(tag='245',
                     indicators=['1', '0'],
                     subfields=[
                         'a',
                         pandata.title,
                         'a',
                         '[electronic resource]',
                     ]))

    # publisher, date
    if pandata.publisher:
        field260 = pymarc.Field(tag='260',
                                indicators=[' ', ' '],
                                subfields=[
                                    'b',
                                    pandata.publisher,
                                ])
        if publication_date:
            field260.add_subfield('c', unicodestr(publication_date))
        record.add_ordered_field(field260)

    if pandata.description:
        #add 520 field (description)
        field520 = pymarc.Field(tag='520',
                                indicators=[' ', ' '],
                                subfields=[
                                    'a',
                                    pandata.description,
                                ])
        record.add_ordered_field(field520)

    # subjects
    if pandata.subjects:
        for subject in pandata.subjects:
            if isinstance(subject, tuple):
                (authority, heading) = subject
            elif isinstance(subject, str):
                (authority, heading) = ('', subject)
            else:
                continue
            if authority == 'lcsh':
                subjectfield = pymarc.Field(
                    tag='650',
                    indicators=['0', '0'],
                )
                subjectfield.add_subfield('a', heading)
            elif authority == 'lcc':
                subjectfield = pymarc.Field(
                    tag='050',
                    indicators=['0', '0'],
                )
                subjectfield.add_subfield('a', heading)
            elif authority == '':  #uncontrolled term
                subjectfield = pymarc.Field(
                    tag='653',
                    indicators=['0', '0'],
                )
                subjectfield.add_subfield('a', heading)
            else:
                subjectfield = None
            if subjectfield:
                record.add_ordered_field(subjectfield)
    add_license(record, pandata)

    return record
Ejemplo n.º 17
0
 def __unicode__(self):
     return u'{%s} (%s) %s' % (unicodestr(self.questionset), self.number, self.text)
Ejemplo n.º 18
0
    def __init__read(self):
        """
        Constructor to initialize the zipfile in read-only mode

        """
        try:
            # Read the container
            f = self.read("META-INF/container.xml")
        except KeyError:
            # By specification, there MUST be a container.xml in EPUB
            logger.warning("The %s file is not a valid OCF." % unicodestr(self.filename))
            raise InvalidEpub
        try:
            # There MUST be a full path attribute on first grandchild...
            self.opf_path = ET.fromstring(f)[0][0].get("full-path")
        except IndexError:
            #  ...else the file is invalid.
            logger.warning("The %s file is not a valid OCF." % unicodestr(self.filename))
            raise InvalidEpub

        # NEW: json-able info tree
        self.info = {"metadata": {},
                     "manifest": [],
                     "spine": [],
                     "guide": []}

        self.root_folder = os.path.dirname(self.opf_path)   # Used to compose absolute paths for reading in zip archive
        self.opf = ET.fromstring(self.read(self.opf_path))  # OPF tree

        ns = re.compile(r'\{.*?\}')  # RE to strip {namespace} mess

        # Iterate over <metadata> section, fill EPUB.info["metadata"] dictionary
        for i in self.opf.find("{0}metadata".format(NAMESPACE["opf"])):
            if i.tag and isinstance(i.tag, unicodestr):
                tag = ns.sub('', i.tag)
                if tag not in self.info["metadata"]:
                    self.info["metadata"][tag] = i.text or i.attrib
                else:
                    self.info["metadata"][tag] = [self.info["metadata"][tag], i.text or i.attrib]

        # Get id of the cover in <meta name="cover" />
        try:
            coverid = self.opf.find('.//{0}meta[@name="cover"]'.format(NAMESPACE["opf"])).get("content")
        except AttributeError:
            # It's a facultative field, after all
            coverid = None
        self.cover = coverid  # This is the manifest ID of the cover

        self.info["manifest"] = [{"id": x.get("id"),                # Build a list of manifest items
                                  "href": x.get("href"),
                                  "mimetype": x.get("media-type")}
                                 for x in self.opf.find("{0}manifest".format(NAMESPACE["opf"])) if x.get("id")]

        self.info["spine"] = [{"idref": x.get("idref")}             # Build a list of spine items
                              for x in self.opf.find("{0}spine".format(NAMESPACE["opf"])) if x.get("idref")]
        try:
            self.info["guide"] = [{"href": x.get("href"),           # Build a list of guide items
                                   "type": x.get("type"),
                                   "title": x.get("title")}
                                  for x in self.opf.find("{0}guide".format(NAMESPACE["opf"])) if x.get("href")]
        except TypeError:                                           # The guide element is optional
            self.info["guide"] = None

        # Document identifier
        try:
            self.id = self.opf.find('.//{0}identifier[@id="{1}"]'.format(NAMESPACE["dc"],
                                                                         self.opf.get("unique-identifier"))).text
        except AttributeError:
            raise InvalidEpub("Cannot process an EPUB without unique-identifier attribute of the package element")
        # Get and parse the TOC
        toc_id = self.opf[2].get("toc")
        if toc_id:
            expr = ".//{0}item[@id='{1:s}']".format(NAMESPACE["opf"], toc_id)
        else:
            expr = ".//{0}item[@properties='nav']".format(NAMESPACE["opf"])
        toc_name = self.opf.find(expr).get("href")    
        self.ncx_path = os.path.join(self.root_folder, toc_name)
        self.ncx = ET.fromstring(self.read(self.ncx_path))
        self.contents = [{"name": i[0][0].text or "None",           # Build a list of toc elements
                          "src": os.path.join(self.root_folder, i[1].get("src")),
                          "id":i.get("id")}
                         for i in self.ncx.iter("{0}navPoint".format(NAMESPACE["ncx"]))]    # The iter method