Beispiel #1
0
 def handle(self, **options):
     badworks = Work.objects.exclude(language__regex=iso639)
     badworks = badworks.exclude(language__regex=lang_and_locale)
     self.stdout.write('{} works to fix'.format(badworks.count()))
     for work in badworks:
         language = lang_to_language_code(work.language)
         work.language = language if language else 'xx'
         work.save()
Beispiel #2
0
 def get_language(self):
     langlabel = self.doc.find(string='Language')
     lang = langlabel.parent.parent.find_next_sibling() if langlabel else ''
     lang = lang.get_text() if lang else ''
     lang = lang_to_language_code(lang) if lang else ''
     if lang:
         self.set('language', lang)
     else:
         super(UbiquityScraper, self).get_language()
Beispiel #3
0
    def load_from_pandata(self, metadata, work=None):
        ''' metadata is a Pandata object'''

        #find an work to associate
        edition = None
        has_ed_id = False
        if metadata.url:
            new_ids = [('http', 'http', metadata.url)]
        else:
            new_ids = []
        for (identifier, id_code) in IDTABLE:
            # note that the work chosen is the last associated
            value = metadata.edition_identifiers.get(identifier, None)
            value = identifier_cleaner(id_code)(value)
            if not value:
                value = metadata.identifiers.get(identifier, None)
            if value:
                if id_code not in WORK_IDENTIFIERS:
                    has_ed_id = True
                value = value[0] if isinstance(value, list) else value
                try:
                    id = models.Identifier.objects.get(type=id_code, value=value)
                    if work and id.work and id.work_id is not work.id:
                        # dangerous! merge newer into older
                        if work.id < id.work_id:
                            work = merge_works(work, id.work)
                        else:
                            work = merge_works(id.work, work)
                    else:
                        work = id.work
                    if id.edition and not edition:
                        edition = id.edition
                except models.Identifier.DoesNotExist:
                    if id_code != 'edid' or not has_ed_id:  #last in loop
                        # only need to create edid if there is no edition id for the edition
                        new_ids.append((identifier, id_code, value))

        if not work:
            if metadata.title:
                language = lang_to_language_code(metadata.language)
                work = models.Work.objects.create(title=metadata.title, language=language if language else 'xx')
            else:
                return None
        if not edition:
            if metadata.edition_note:
                (note, created) = models.EditionNote.objects.get_or_create(note=metadata.edition_note)
            else:
                note = None
            edition = models.Edition.objects.create(
                title=metadata.title,
                work=work,
                note=note,
            )
        for (identifier, id_code, value) in new_ids:
            models.Identifier.set(
                type=id_code,
                value=value,
                edition=edition if id_code not in WORK_IDENTIFIERS else None,
                work=work,
            )
        if metadata.publisher: #always believe yaml
            edition.set_publisher(metadata.publisher)

        if metadata.publication_date: #always believe yaml
            edition.publication_date = metadata.publication_date

        #be careful about overwriting the work description
        if metadata.description and len(metadata.description) > len(work.description):
            # don't over-write reasonably long descriptions
            if len(work.description) < 500:
                work.description = metadata.description

        if metadata.creator and not edition.authors.count():
            edition.authors.clear()
            for key in metadata.creator.keys():
                creators = metadata.creator[key]
                rel_code = inverse_marc_rels.get(key, None)
                if not rel_code:
                    rel_code = inverse_marc_rels.get(key.rstrip('s'), 'auth')
                creators = creators if isinstance(creators, list) else [creators]
                for creator in creators:
                    edition.add_author(unreverse_name(creator.get('agent_name', '')), relation=rel_code)
        for yaml_subject in metadata.subjects: #always add yaml subjects (don't clear)
            if isinstance(yaml_subject, tuple):
                (authority, heading) = yaml_subject
            elif isinstance(yaml_subject, str) or isinstance(yaml_subject, unicode):
                (authority, heading) = ('', yaml_subject)
            else:
                continue
            subject = models.Subject.set_by_name(heading, work=work, authority=authority)

        # the default edition uses the first cover in covers.
        for cover in metadata.covers:
            if cover.get('image_path', False):
                edition.cover_image = urljoin(self.base_url, cover['image_path'])
                break
            elif cover.get('image_url', False):
                edition.cover_image = cover['image_url']
                break
        work.save()
        edition.save()
        return edition
Beispiel #4
0
def doab_lang_to_iso_639_1(lang):
    lang = lang_to_language_code(lang)
    return lang if lang else 'xx'