def handle(self, **options): badworks = Work.objects.exclude(language__regex=iso639) badworks = badworks.exclude(language__regex=lang_and_locale) self.stdout.write('{} works to fix'.format(badworks.count())) for work in badworks: language = lang_to_language_code(work.language) work.language = language if language else 'xx' work.save()
def get_language(self): langlabel = self.doc.find(string='Language') lang = langlabel.parent.parent.find_next_sibling() if langlabel else '' lang = lang.get_text() if lang else '' lang = lang_to_language_code(lang) if lang else '' if lang: self.set('language', lang) else: super(UbiquityScraper, self).get_language()
def load_from_pandata(self, metadata, work=None): ''' metadata is a Pandata object''' #find an work to associate edition = None has_ed_id = False if metadata.url: new_ids = [('http', 'http', metadata.url)] else: new_ids = [] for (identifier, id_code) in IDTABLE: # note that the work chosen is the last associated value = metadata.edition_identifiers.get(identifier, None) value = identifier_cleaner(id_code)(value) if not value: value = metadata.identifiers.get(identifier, None) if value: if id_code not in WORK_IDENTIFIERS: has_ed_id = True value = value[0] if isinstance(value, list) else value try: id = models.Identifier.objects.get(type=id_code, value=value) if work and id.work and id.work_id is not work.id: # dangerous! merge newer into older if work.id < id.work_id: work = merge_works(work, id.work) else: work = merge_works(id.work, work) else: work = id.work if id.edition and not edition: edition = id.edition except models.Identifier.DoesNotExist: if id_code != 'edid' or not has_ed_id: #last in loop # only need to create edid if there is no edition id for the edition new_ids.append((identifier, id_code, value)) if not work: if metadata.title: language = lang_to_language_code(metadata.language) work = models.Work.objects.create(title=metadata.title, language=language if language else 'xx') else: return None if not edition: if metadata.edition_note: (note, created) = models.EditionNote.objects.get_or_create(note=metadata.edition_note) else: note = None edition = models.Edition.objects.create( title=metadata.title, work=work, note=note, ) for (identifier, id_code, value) in new_ids: models.Identifier.set( type=id_code, value=value, edition=edition if id_code not in WORK_IDENTIFIERS else None, work=work, ) if metadata.publisher: #always believe yaml edition.set_publisher(metadata.publisher) if metadata.publication_date: #always believe yaml edition.publication_date = metadata.publication_date #be careful about overwriting the work description if metadata.description and len(metadata.description) > len(work.description): # don't over-write reasonably long descriptions if len(work.description) < 500: work.description = metadata.description if metadata.creator and not edition.authors.count(): edition.authors.clear() for key in metadata.creator.keys(): creators = metadata.creator[key] rel_code = inverse_marc_rels.get(key, None) if not rel_code: rel_code = inverse_marc_rels.get(key.rstrip('s'), 'auth') creators = creators if isinstance(creators, list) else [creators] for creator in creators: edition.add_author(unreverse_name(creator.get('agent_name', '')), relation=rel_code) for yaml_subject in metadata.subjects: #always add yaml subjects (don't clear) if isinstance(yaml_subject, tuple): (authority, heading) = yaml_subject elif isinstance(yaml_subject, str) or isinstance(yaml_subject, unicode): (authority, heading) = ('', yaml_subject) else: continue subject = models.Subject.set_by_name(heading, work=work, authority=authority) # the default edition uses the first cover in covers. for cover in metadata.covers: if cover.get('image_path', False): edition.cover_image = urljoin(self.base_url, cover['image_path']) break elif cover.get('image_url', False): edition.cover_image = cover['image_url'] break work.save() edition.save() return edition
def doab_lang_to_iso_639_1(lang): lang = lang_to_language_code(lang) return lang if lang else 'xx'