def _edition(self): if self.metadata.get("_edition", ''): return unicodestr(self.metadata["_edition"]) elif self.get_one_identifier('isbn'): return unicodestr( self.get_one_identifier('isbn')) #use first isbn if available elif self._repo: return edition_name_from_repo(self._repo) else: return 'book' #this will be the default file name
def set_edition_id(self): # set a (hopefully globally unique) edition identifier if not 'edition_identifiers' in self.metadata: self.metadata['edition_identifiers'] = {} base = self.url if not base: try: base = unicodestr(self.identifiers.keys[0]) + ':' + unicodestr( self.identifiers.values[0]) except: base = u'repo:' + unicodestr(self._repo) self.metadata['edition_identifiers'][ 'edition_id'] = base + '#' + self._edition
def htm_modified(file_path): g=rdflib.Graph() try: g.load(file_path) except IOError: return None ld = serializer.from_rdf(g, context_data=context, base=None, use_native_types=False, use_rdf_type=False, auto_compact=False, startnode=None, index=False) graph = ld['@graph'] nodes = {} for obj in graph: if isinstance(obj,dict): obj = obj.copy() if "@id" in obj and obj["@id"].startswith("_"): nodeid = obj["@id"] node = nodes.get(nodeid,{}) del obj["@id"] node.update(obj) nodes[nodeid] = node # now remove the blank nodes and the files newnodes = [] top = None for obj in unblank_node(graph,nodes): try: # if obj[u'@type']== u'pgterms:file': if unicodestr(obj[u'@id']).endswith('.htm'): return obj[u'dcterms:modified' ][u'@value'] except: pass
def get_url(key, val, entities=None): if isinstance(val,list): return (key,[get_url(key, item, entities=None)[1] for item in val]) try: return (key, unicodestr(val['@id'])) except KeyError: return None
def strip_controls(_string): out = [] _string = unicodestr(_string) for ch in _string: if unicodedata.category(ch)[0] != 'C': # not a control character out.append(ch) elif ch in u'\r\n\t': # allow whitespace out.append(ch) return u''.join(out)
def pg_rdf_to_json(file_path): g=rdflib.Graph() g.load(file_path) #print(g.serialize(format='json-ld', indent=4, context=context)) ld = serializer.from_rdf(g, context_data=context, base=None, use_native_types=False, use_rdf_type=False, auto_compact=False, startnode=None, index=False) graph = ld['@graph'] #print(json.dumps(graph,indent=2, separators=(',', ': '), sort_keys=True)) nodes = {} for obj in graph: if isinstance(obj,dict): obj = obj.copy() if "@id" in obj and obj["@id"].startswith("_"): nodeid = obj["@id"] node = nodes.get(nodeid,{}) del obj["@id"] node.update(obj) nodes[nodeid] = node # now remove the blank nodes and the files newnodes = [] top = None for obj in unblank_node(graph,nodes): try: # if obj['@type']== 'pgterms:file': continue elif obj['@type']== 'pgterms:ebook': top = obj elif '@id' in obj and (unicodestr(obj['@id'])=='http://www.gutenberg.org/'): continue else: newnodes.append(obj) except KeyError: continue #print(json.dumps(top,indent=2, separators=(',', ': '), sort_keys=True)) entities={} for node in newnodes: node_id=node.get('@id',None) if node_id: entities[node_id]=mapdata(node,pandata_map,entities) for adder in pandata_adders: adder(top,entities) top2 = mapdata(top, pandata_map, entities) for postprocessor in postprocessors: postprocessor(top2) return top2
def __init__(self, path, threaded=True): LockBase.__init__(self, path, threaded) self.lock_file = unicodestr(self.lock_file) self.unique_name = unicodestr(self.unique_name) import sqlite3 self.connection = sqlite3.connect(SQLiteFileLock.testdb) c = self.connection.cursor() try: c.execute("create table locks" "(" " lock_file varchar(32)," " unique_name varchar(32)" ")") except sqlite3.OperationalError: pass else: self.connection.commit() import atexit atexit.register(os.unlink, SQLiteFileLock.testdb)
def asciify(_title): _title = unicodedata.normalize('NFD', unicodestr(_title)) ascii = True out = [] ok = u"1234567890qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM- '," for ch in _title: if ch in ok: out.append(ch) elif unicodedata.category(ch)[0] == ("L"): #a letter out.append(hex(ord(ch))) ascii = False elif ch in u'\r\n\t': out.append(u'-') return (ascii, sub("[ ',-]+", '-', "".join(out)))
def get_repos_to_upload(self): pg_id = int(last_pgid) + 1 more = True pg_ids = [] while more: new_rdffile = os.path.join(self.rdf_library_dir, 'cache', 'epub', unicodestr(pg_id), 'pg{}.rdf'.format(pg_id)) if os.path.exists(new_rdffile): pg_ids.append(pg_id) pg_id += 1 else: if pg_id in missing_pgid: pg_id += 1 else: more = False return pg_ids
def _update_tags(self, runinfo): if not runinfo: return tags_to_add = [] for choice in self.question.choices(): tags = choice.tags if not tags: continue tags = tags.split(',') runinfo.remove_tags(tags) for split_answer in self.split_answer(): if unicodestr(split_answer) == choice.value: tags_to_add.extend(tags) runinfo.add_tags(tags_to_add) runinfo.save()
def test_new_epub(self): self.randomf = '%012x.epub' % random.randrange(16**12) # random name epub = EPUB(self.randomf, mode='w') epub.addmetadata('test', 'GOOD') uxml = u'<?xml version="1.0" encoding="utf-8" standalone="yes"?><test>VojtěchVojtíšek</test>' part = StringIO(unicodestr(uxml)) epub.addpart(part, "testpart.xhtml", "application/xhtml+xml", 2) epub.close() epub = EPUB(self.randomf, mode='r') self.assertEqual(len(epub.opf), 4) # opf lenght self.assertEqual(len(epub.opf[0]), 6) # metadata self.assertEqual(len(epub.opf[1]), 2) # manifest self.assertEqual(len(epub.opf[2]), 1) # spine self.assertEqual(len(epub.opf[3]), 0) # guide with open(self.randomf, 'r+b') as epub_from_file: epub = EPUB(epub_from_file, mode='a') epub.addpart(part, "testpart2.xhtml", "application/xhtml+xml", 3) epub.close() epub = EPUB(self.randomf, mode='a') epub.addmetadata('test2', 'GOOD') epub.writetodisk('testwrite.epub')
def additem(self, fileObject, href, mediatype): """ Add a file to manifest only :type fileObject: BytesIO :param fileObject: :type href: unicodestr :param href: :type mediatype: unicodestr :param mediatype: """ assert self.epub_mode != "r", "%s is not writable" % self element = ET.Element( NAMESPACE.get("opf") + "item", attrib={"id": "id_" + unicodestr(uuid.uuid4())[:5], "href": href, "media-type": mediatype} ) try: self._writestr(os.path.join(self.root_folder, element.attrib["href"]), fileObject.getvalue().encode('utf-8')) except AttributeError: self._writestr(os.path.join(self.root_folder, element.attrib["href"]), fileObject) self.opf[1].append(element) return element.attrib["id"]
def question_timeperiod(request, question): cd = question.getcheckdict() if "units" in cd: units = cd["units"].split(',') else: units = ["day", "week", "month", "year"] timeperiods = [] if not units: units = ["day", "week", "month", "year"] key1 = "question_%s" % question.number key2 = "question_%s_unit" % question.number value = request.POST.get(key1, '') unitselected = request.POST.get(key2, units[0]) for x in units: if x in perioddict: timeperiods.append( (x, unicodestr(perioddict[x]), unitselected == x)) return { "required": "required" in cd, "timeperiods": timeperiods, "value": value, }
def get_id(key, val, entities=None) : return (key,unicodestr(val))
def send_now(users, label, extra_context=None, on_site=True, sender=None): """ Creates a new notice. This is intended to be how other apps create new notices. notification.send(user, "friends_invite_sent", { "spam": "eggs", "foo": "bar", ) You can pass in on_site=False to prevent the notice emitted from being displayed on the site. """ if extra_context is None: extra_context = {} notice_type = NoticeType.objects.get(label=label) protocol = getattr(settings, "DEFAULT_HTTP_PROTOCOL", "http") current_site = Site.objects.get_current() notices_url = u"%s://%s%s" % ( protocol, unicodestr(current_site), reverse("notification_notices"), ) current_language = get_language() formats = ( "short.txt", "full.txt", "notice.html", "full.html", ) # TODO make formats configurable for user in users: recipients = [] # get user language for user from language store defined in # NOTIFICATION_LANGUAGE_MODULE setting try: language = get_notification_language(user) except LanguageStoreNotAvailable: language = None if language is not None: # activate the user's language activate(language) # update context with user specific translations context = { "recipient": user, "sender": sender, "notice": ugettext(notice_type.display), "notices_url": notices_url, "current_site": current_site, } context.update(extra_context) # get prerendered format messages messages = get_formatted_messages(formats, label, context) context['message'] = messages["short.txt"] subject = render_to_string("notification/email_subject.txt", context) # Strip newlines from subject subject = "".join(subject.splitlines()) context['message'] = messages["full.txt"] body = render_to_string("notification/email_body.txt", context) notice = Notice.objects.create(recipient=user, message=messages["notice.html"], notice_type=notice_type, on_site=on_site, sender=sender) if should_send(user, notice_type, "1") and user.email and user.is_active: # Email recipients.append(user.email) send_mail(subject, body, settings.DEFAULT_FROM_EMAIL, recipients) # reset environment to original language activate(current_language)
def stub(pandata): record = pymarc.Record(force_utf8=True) now = datetime.now() #mostly to identify this record as a 'stub' record.add_ordered_field( pymarc.Field(tag='001', data='stb' + now.strftime('%y%m%d%H%M%S'))) add_stuff(record) # fun fun fun 008 new_field_value = now.strftime('%y%m%d') + 's' publication_date = pandata.gutenberg_issued # library cataloging will consider "Project Gutenberg" to be the publisher of the edition if publication_date and len( publication_date) > 3: # must be at least a year new_field_value += publication_date[0:4] else: new_field_value += '||||' new_field_value += '||||xx |||||o|||||||||||eng||' record.add_ordered_field(pymarc.Field(tag='008', data=new_field_value)) identifiers = pandata.identifiers # add IBSNs if available isbn = identifiers.get( 'isbn', None) # most isbns in PG are really for related editions if isbn: record.add_ordered_field( pymarc.Field(tag='020', indicators=[' ', ' '], subfields=['a', isbn])) related = identifiers.get('isbns_related', []) for isbn in related: record.add_ordered_field( pymarc.Field(tag='020', indicators=[' ', ' '], subfields=['a', isbn + ' (related)'])) # OCLC number oclc = identifiers.get('oclc', None) if oclc: record.add_ordered_field( pymarc.Field(tag='035', indicators=[' ', ' '], subfields=['a', '(OCoLC)' + str(oclc)])) # contributors # use marc codes from http://www.loc.gov/marc/relators/relaterm.html creators = [] # heuristically decide the "main entry", the first creator for marc_type in main_entries: creator = pandata.creator.get(marc_rels.get(marc_type), None) if creator: creators.append((marc_type, creator)) else: creator = pandata.creator.get(marc_rels.get(plural(marc_type)), []) for each_creator in creator: creators.append((marc_type, each_creator)) if creators: (marc_code, creator) = creators[0] sortname = creator.get('agent_sortname', '') if not sortname: sortname = reverse_name(creator.get('agent_name', '')) record.add_ordered_field( pymarc.Field(tag='100', indicators=['1', ' '], subfields=[ 'a', sortname, '4', marc_code, ])) #language if pandata.language: is_translation = '1' if pandata.translators else '0' record.add_ordered_field( pymarc.Field(tag='041', indicators=[is_translation, 'iso639-1'], subfields=['a', pandata.language])) contributors = creators[1:] if creators else [] for contributor_type in pandata.contributor.keys(): contributor = pandata.contributor[contributor_type] #handle plurals marc_code = inverse_marc_rels.get(contributor_type, 'unk') if contributor_type in marc_rels.values(): #single value contributors.append((marc_code, contributor)) else: #list for each_contributor in contributor: contributors.append((marc_code, each_contributor)) for (marc_code, contributor) in contributors: sortname = contributor.get('agent_sortname', '') if not sortname: sortname = reverse_name(contributor.get('agent_name', '')) record.add_ordered_field( pymarc.Field(tag='700', indicators=['1', ' '], subfields=[ 'a', sortname, 'e', marc_rels[marc_code].replace('_', ' ') + '.', '4', marc_code, ])) # add subfield to 245 indicating format record.add_ordered_field( pymarc.Field(tag='245', indicators=['1', '0'], subfields=[ 'a', pandata.title, 'a', '[electronic resource]', ])) # publisher, date if pandata.publisher: field260 = pymarc.Field(tag='260', indicators=[' ', ' '], subfields=[ 'b', pandata.publisher, ]) if publication_date: field260.add_subfield('c', unicodestr(publication_date)) record.add_ordered_field(field260) if pandata.description: #add 520 field (description) field520 = pymarc.Field(tag='520', indicators=[' ', ' '], subfields=[ 'a', pandata.description, ]) record.add_ordered_field(field520) # subjects if pandata.subjects: for subject in pandata.subjects: if isinstance(subject, tuple): (authority, heading) = subject elif isinstance(subject, str): (authority, heading) = ('', subject) else: continue if authority == 'lcsh': subjectfield = pymarc.Field( tag='650', indicators=['0', '0'], ) subjectfield.add_subfield('a', heading) elif authority == 'lcc': subjectfield = pymarc.Field( tag='050', indicators=['0', '0'], ) subjectfield.add_subfield('a', heading) elif authority == '': #uncontrolled term subjectfield = pymarc.Field( tag='653', indicators=['0', '0'], ) subjectfield.add_subfield('a', heading) else: subjectfield = None if subjectfield: record.add_ordered_field(subjectfield) add_license(record, pandata) return record
def __unicode__(self): return u'{%s} (%s) %s' % (unicodestr(self.questionset), self.number, self.text)
def __init__read(self): """ Constructor to initialize the zipfile in read-only mode """ try: # Read the container f = self.read("META-INF/container.xml") except KeyError: # By specification, there MUST be a container.xml in EPUB logger.warning("The %s file is not a valid OCF." % unicodestr(self.filename)) raise InvalidEpub try: # There MUST be a full path attribute on first grandchild... self.opf_path = ET.fromstring(f)[0][0].get("full-path") except IndexError: # ...else the file is invalid. logger.warning("The %s file is not a valid OCF." % unicodestr(self.filename)) raise InvalidEpub # NEW: json-able info tree self.info = {"metadata": {}, "manifest": [], "spine": [], "guide": []} self.root_folder = os.path.dirname(self.opf_path) # Used to compose absolute paths for reading in zip archive self.opf = ET.fromstring(self.read(self.opf_path)) # OPF tree ns = re.compile(r'\{.*?\}') # RE to strip {namespace} mess # Iterate over <metadata> section, fill EPUB.info["metadata"] dictionary for i in self.opf.find("{0}metadata".format(NAMESPACE["opf"])): if i.tag and isinstance(i.tag, unicodestr): tag = ns.sub('', i.tag) if tag not in self.info["metadata"]: self.info["metadata"][tag] = i.text or i.attrib else: self.info["metadata"][tag] = [self.info["metadata"][tag], i.text or i.attrib] # Get id of the cover in <meta name="cover" /> try: coverid = self.opf.find('.//{0}meta[@name="cover"]'.format(NAMESPACE["opf"])).get("content") except AttributeError: # It's a facultative field, after all coverid = None self.cover = coverid # This is the manifest ID of the cover self.info["manifest"] = [{"id": x.get("id"), # Build a list of manifest items "href": x.get("href"), "mimetype": x.get("media-type")} for x in self.opf.find("{0}manifest".format(NAMESPACE["opf"])) if x.get("id")] self.info["spine"] = [{"idref": x.get("idref")} # Build a list of spine items for x in self.opf.find("{0}spine".format(NAMESPACE["opf"])) if x.get("idref")] try: self.info["guide"] = [{"href": x.get("href"), # Build a list of guide items "type": x.get("type"), "title": x.get("title")} for x in self.opf.find("{0}guide".format(NAMESPACE["opf"])) if x.get("href")] except TypeError: # The guide element is optional self.info["guide"] = None # Document identifier try: self.id = self.opf.find('.//{0}identifier[@id="{1}"]'.format(NAMESPACE["dc"], self.opf.get("unique-identifier"))).text except AttributeError: raise InvalidEpub("Cannot process an EPUB without unique-identifier attribute of the package element") # Get and parse the TOC toc_id = self.opf[2].get("toc") if toc_id: expr = ".//{0}item[@id='{1:s}']".format(NAMESPACE["opf"], toc_id) else: expr = ".//{0}item[@properties='nav']".format(NAMESPACE["opf"]) toc_name = self.opf.find(expr).get("href") self.ncx_path = os.path.join(self.root_folder, toc_name) self.ncx = ET.fromstring(self.read(self.ncx_path)) self.contents = [{"name": i[0][0].text or "None", # Build a list of toc elements "src": os.path.join(self.root_folder, i[1].get("src")), "id":i.get("id")} for i in self.ncx.iter("{0}navPoint".format(NAMESPACE["ncx"]))] # The iter method