def set_by_name(cls, subject, work=None, authority=None): ''' use this method whenever you would be creating a new subject!''' subject = subject.strip() # make sure it's not a ; delineated list subjects = subject.split(';') for additional_subject in subjects[1:]: cls.set_by_name(additional_subject, work, authority) subject = subjects[0] # make sure there's no heading headingmatch = re.match(r'^!(.+):(.+)', subject) if headingmatch: subject = headingmatch.group(2).strip() authority = headingmatch.group(1).strip() elif subject.startswith('nyt:'): subject = subject[4:].split('=')[0].replace( '_', ' ').strip().capitalize() subject = 'NYT Bestseller - {}'.format(subject) authority = 'nyt' elif subject.startswith('award:'): subject = subject[6:].split('=')[0].replace( '_', ' ').strip().capitalize() subject = 'Award Winner - {}'.format(subject) authority = 'award' if valid_subject(subject): (subject_obj, created) = cls.objects.get_or_create(name=subject) if not subject_obj.authority and authority: subject_obj.authority = authority subject_obj.save() subject_obj.works.add(work) return subject_obj else: return None
def get_keywords(self): value = self.check_metas(['keywords']).strip(',;') if value: subjects = [] for subject in re.split(' *[;,] *', value): if valid_subject(subject): subjects.append(subject) self.set('subjects', subjects)
def attach_more_doab_metadata(edition, description, subjects, publication_date, publisher_name=None, language=None, authors=u''): """ for given edition, attach description, subjects, publication date to corresponding Edition and Work """ # if edition doesn't have a publication date, update it if not edition.publication_date: edition.publication_date = publication_date # if edition.publisher_name is empty, set it if not edition.publisher_name: edition.set_publisher(publisher_name) edition.save() # attach description to work if it's not empty work = edition.work if not work.description: work.description = description # update subjects for s in subjects: if valid_subject(s): models.Subject.set_by_name(s, work=work) # set reading level of work if it's empty; doab is for adults. if not work.age_level: work.age_level = '18-' if language: work.language = language work.save() if authors and authors == authors: # test for authors != NaN authlist = creator_list(authors) if edition.authors.all().count() < len(authlist): edition.authors.clear() if authlist is not None: for [rel, auth] in authlist: edition.add_author(auth, rel) return edition
def handle(self, **options): semicolon_subjects = Subject.objects.filter(name__contains=";") for subject in semicolon_subjects: for work in subject.works.all(): Subject.set_by_name(subject.name, work=work) subject.delete() nyt_subjects = Subject.objects.filter(name__startswith="nyt:") for subject in nyt_subjects: for work in subject.works.all(): Subject.set_by_name(subject.name, work=work) subject.delete() award_subjects = Subject.objects.filter(name__startswith="award:") for subject in award_subjects: for work in subject.works.all(): Subject.set_by_name(subject.name, work=work) subject.delete() period_subjects = Subject.objects.filter(name__contains=".") for subject in period_subjects: if not valid_subject(subject.name): subject.delete()