def info(self): if not hasattr(self, '_info'): from librarian import dcparser from librarian import picture info = dcparser.parse(self.xml_file.path, picture.PictureInfo) self._info = info return self._info
def from_xml_file(cls, xml_file, **kwargs): # use librarian to parse meta-data book_info = dcparser.parse(xml_file) if not isinstance(xml_file, File): xml_file = File(open(xml_file)) try: return cls.from_text_and_meta(xml_file, book_info, **kwargs) finally: xml_file.close()
def from_xml_file(cls, xml_file, **kwargs): from django.core.files import File from librarian import dcparser # use librarian to parse meta-data book_info = dcparser.parse(xml_file) if not isinstance(xml_file, File): xml_file = File(open(xml_file)) try: return cls.from_text_and_meta(xml_file, book_info, **kwargs) finally: xml_file.close()
def extract_metadata(self, book, book_info=None, dc_only=None): """ Extract metadata from book and returns a map of fields keyed by fieldname """ fields = {} if book_info is None: book_info = dcparser.parse(open(book.xml_file.path)) fields["slug"] = book.slug fields["tags"] = [t.name for t in book.tags] fields["is_book"] = True # validator, name for field in dcparser.BookInfo.FIELDS: if dc_only and field.name not in dc_only: continue if hasattr(book_info, field.name): if not getattr(book_info, field.name): continue # since no type information is available, we use validator type_indicator = field.validator if type_indicator == dcparser.as_unicode: s = getattr(book_info, field.name) if field.multiple: s = ", ".join(s) fields[field.name] = s elif type_indicator == dcparser.as_person: p = getattr(book_info, field.name) if isinstance(p, dcparser.Person): persons = unicode(p) else: persons = ", ".join(map(unicode, p)) fields[field.name] = persons elif type_indicator == dcparser.as_date: dt = getattr(book_info, field.name) fields[field.name] = dt # get published date pd = None if hasattr(book_info, "source_name") and book_info.source_name: match = self.published_date_re.search(book_info.source_name) if match is not None: pd = str(match.groups()[0]) if not pd: pd = "" fields["published_date"] = pd return fields
def extract_metadata(self, book, book_info=None, dc_only=None): """ Extract metadata from book and returns a map of fields keyed by fieldname """ fields = {} if book_info is None: book_info = dcparser.parse(open(book.xml_file.path)) fields['slug'] = book.slug fields['tags'] = [t.name for t in book.tags] fields['is_book'] = True # validator, name for field in dcparser.BookInfo.FIELDS: if dc_only and field.name not in dc_only: continue if hasattr(book_info, field.name): if not getattr(book_info, field.name): continue # since no type information is available, we use validator type_indicator = field.validator if type_indicator == dcparser.as_unicode: s = getattr(book_info, field.name) if field.multiple: s = ', '.join(s) fields[field.name] = s elif type_indicator == dcparser.as_person: p = getattr(book_info, field.name) if isinstance(p, dcparser.Person): persons = unicode(p) else: persons = ', '.join(map(unicode, p)) fields[field.name] = persons elif type_indicator == dcparser.as_date: dt = getattr(book_info, field.name) fields[field.name] = dt # get published date pd = None if hasattr(book_info, 'source_name') and book_info.source_name: match = self.published_date_re.search(book_info.source_name) if match is not None: pd = str(match.groups()[0]) if not pd: pd = "" fields["published_date"] = pd return fields
def rebuild_extra_info(apps, schema_editor): Picture = apps.get_model("picture", "Picture") from librarian.picture import PictureInfo from librarian import dcparser for pic in Picture.objects.all(): info = dcparser.parse(pic.xml_file.path, PictureInfo) pic.extra_info = info.to_dict() areas_json = pic.areas_json for field in areas_json[u'things'].values(): field[u'object'] = field[u'object'].capitalize() pic.areas_json = areas_json html_text = unicode(render_to_string('picture/picture_info.html', { 'things': pic.areas_json['things'], 'themes': pic.areas_json['themes'], })) pic.html_file.save("%s.html" % pic.slug, ContentFile(html_text)) pic.save()
def check_load(xml_file): pi = dcparser.parse(xml_file, picture.PictureInfo) assert pi is not None assert isinstance(pi, picture.PictureInfo)