def create_subjects_recursive(custom_provider, root_text, exclude_texts, parent=None): logger.info('Duplicating BePress subject {} on {}'.format(root_text, custom_provider._id)) bepress_subj = Subject.objects.get(provider=BEPRESS_PROVIDER, text=root_text) custom_subj = Subject(text=root_text, parent=parent, bepress_subject=bepress_subj, provider=custom_provider) custom_subj.save() # This is not a problem now, as all excluded subjects are leafs, but it could be problematic if non-leafs had their children excluded. # It could also be problematic if they didn't, if any of those children are used by existing preprints. # TODO: Determine correct resolution for child_text in bepress_subj.children.exclude(text__in=exclude_texts).values_list('text', flat=True): create_subjects_recursive(custom_provider, child_text, exclude_texts, parent=custom_subj)
def map_custom_subject(custom_provider, name, parent, mapping): logger.info('Attempting to create subject {} on {} from {} with {}'.format(name, custom_provider._id, mapping, 'parent {}'.format(parent) if parent else 'no parent')) if parent: parent_subject = Subject.objects.filter(provider=custom_provider, text=parent).first() else: parent_subject = None bepress_subject = Subject.objects.get(provider=BEPRESS_PROVIDER, text=mapping) if parent and not parent_subject: return False custom_subject = Subject(provider=custom_provider, text=name, parent=parent_subject, bepress_subject=bepress_subject) custom_subject.save() return True
def validate_subject_hierarchy(subject_hierarchy): from osf.models import Subject validated_hierarchy, raw_hierarchy = [], set(subject_hierarchy) for subject_id in subject_hierarchy: subject = Subject.load(subject_id) if not subject: raise ValidationValueError('Subject with id <{}> could not be found.'.format(subject_id)) if subject.parent: continue raw_hierarchy.remove(subject_id) validated_hierarchy.append(subject._id) while raw_hierarchy: if not set(subject.children.values_list('_id', flat=True)) & raw_hierarchy: raise ValidationValueError('Invalid subject hierarchy: {}'.format(subject_hierarchy)) else: for child in subject.children.filter(_id__in=raw_hierarchy): subject = child validated_hierarchy.append(child._id) raw_hierarchy.remove(child._id) break if set(validated_hierarchy) == set(subject_hierarchy): return else: raise ValidationValueError('Invalid subject hierarchy: {}'.format(subject_hierarchy)) raise ValidationValueError('Unable to find root subject in {}'.format(subject_hierarchy))
def set_subjects(self, preprint_subjects, auth, log=True): if not self.has_permission(auth.user, 'write'): raise PermissionsError( 'Must have admin or write permissions to change a preprint\'s subjects.' ) old_subjects = list(self.subjects.values_list('id', flat=True)) self.subjects.clear() for subj_list in preprint_subjects: subj_hierarchy = [] for s in subj_list: subj_hierarchy.append(s) if subj_hierarchy: validate_subject_hierarchy(subj_hierarchy) for s_id in subj_hierarchy: self.subjects.add(Subject.load(s_id)) if log: self.add_log( action=PreprintLog.SUBJECTS_UPDATED, params={ 'subjects': list(self.subjects.values('_id', 'text')), 'old_subjects': list( Subject.objects.filter(id__in=old_subjects).values( '_id', 'text')), 'preprint': self._id }, auth=auth, save=False, ) self.save(old_subjects=old_subjects)
def get_queryset(self): parent = self.request.query_params.get( 'filter[parents]', None) or self.request.query_params.get( 'filter[parent]', None) provider = get_object_or_error(PreprintProvider, self.kwargs['provider_id'], display_name='PreprintProvider') if parent: if parent == 'null': return provider.top_level_subjects if provider.subjects.exists(): return provider.subjects.filter(parent___id=parent) else: # TODO: Delet this when all PreprintProviders have a mapping # Calculate this here to only have to do it once. allowed_parents = [ id_ for sublist in provider.subjects_acceptable for id_ in sublist[0] ] allows_children = [ subs[0][-1] for subs in provider.subjects_acceptable if subs[1] ] return [ sub for sub in Subject.find(MQ('parent___id', 'eq', parent)) if provider.subjects_acceptable == [] or self.is_valid_subject(allows_children=allows_children, allowed_parents=allowed_parents, sub=sub) ] return provider.all_subjects
def to_representation(self, subject): if not isinstance(subject, Subject): subject = Subject.load(subject) if subject is not None: return {'id': subject._id, 'text': subject.text} return None
def set_subjects(self, preprint_subjects, auth, log=True): if not self.has_permission(auth.user, 'write'): raise PermissionsError('Must have admin or write permissions to change a preprint\'s subjects.') old_subjects = list(self.subjects.values_list('id', flat=True)) self.subjects.clear() for subj_list in preprint_subjects: subj_hierarchy = [] for s in subj_list: subj_hierarchy.append(s) if subj_hierarchy: validate_subject_hierarchy(subj_hierarchy) for s_id in subj_hierarchy: self.subjects.add(Subject.load(s_id)) if log: self.add_log( action=PreprintLog.SUBJECTS_UPDATED, params={ 'subjects': list(self.subjects.values('_id', 'text')), 'old_subjects': list(Subject.objects.filter(id__in=old_subjects).values('_id', 'text')), 'preprint': self._id }, auth=auth, save=False, ) self.save(old_subjects=old_subjects)
def validate_subject_hierarchy(subject_hierarchy): from osf.models import Subject validated_hierarchy, raw_hierarchy = [], set(subject_hierarchy) for subject_id in subject_hierarchy: subject = Subject.load(subject_id) if not subject: raise ValidationValueError( 'Subject with id <{}> could not be found.'.format(subject_id)) if subject.parent: continue raw_hierarchy.remove(subject_id) validated_hierarchy.append(subject._id) while raw_hierarchy: if not set(subject.children.values_list( '_id', flat=True)) & raw_hierarchy: raise ValidationValueError( 'Invalid subject hierarchy: {}'.format(subject_hierarchy)) else: for child in subject.children.filter(_id__in=raw_hierarchy): subject = child validated_hierarchy.append(child._id) raw_hierarchy.remove(child._id) break if set(validated_hierarchy) == set(subject_hierarchy): return else: raise ValidationValueError( 'Invalid subject hierarchy: {}'.format(subject_hierarchy)) raise ValidationValueError( 'Unable to find root subject in {}'.format(subject_hierarchy))
def to_representation(self, subject): if not isinstance(subject, Subject): subject = Subject.load(subject) if subject is not None: return { 'id': subject._id, 'text': subject.text, } return None
def _create(cls, target_class, text=None, parents=[], *args, **kwargs): try: subject = Subject.find_one(Q('text', 'eq', text)) except NoResultsFound: subject = target_class(*args, **kwargs) subject.text = text subject.save() subject.parents.add(*parents) subject.save() return subject
def get_subject_id(name): if name not in SUBJECTS_CACHE: subject = None try: subject = Subject.find_one(Q('text', 'eq', name)) except NoResultsFound: raise Exception('Subject: "{}" not found'.format(name)) else: SUBJECTS_CACHE[name] = subject._id return SUBJECTS_CACHE[name]
def setUp(self): super(TestTaxonomy, self).setUp() # Subject 1 has 3 children self.subject1 = SubjectFactory() self.subject1_child1 = SubjectFactory(parent=self.subject1) self.subject1_child2 = SubjectFactory(parent=self.subject1) self.subjects = Subject.find() self.url = '/{}taxonomies/'.format(API_BASE) self.res = self.app.get(self.url) self.data = self.res.json['data']
def get_queryset(self): parent = self.request.query_params.get('filter[parents]', None) or self.request.query_params.get('filter[parent]', None) provider = get_object_or_error(PreprintProvider, self.kwargs['provider_id'], display_name='PreprintProvider') if parent: if parent == 'null': return provider.top_level_subjects if provider.subjects.exists(): return provider.subjects.filter(parent___id=parent) else: # TODO: Delet this when all PreprintProviders have a mapping # Calculate this here to only have to do it once. allowed_parents = [id_ for sublist in provider.subjects_acceptable for id_ in sublist[0]] allows_children = [subs[0][-1] for subs in provider.subjects_acceptable if subs[1]] return [sub for sub in Subject.find(MQ('parent___id', 'eq', parent)) if provider.subjects_acceptable == [] or self.is_valid_subject(allows_children=allows_children, allowed_parents=allowed_parents, sub=sub)] return provider.all_subjects
def set_subjects(self, preprint_subjects, auth): if not self.node.has_permission(auth.user, ADMIN): raise PermissionsError('Only admins can change a preprint\'s subjects.') old_subjects = list(self.subjects.values_list('id', flat=True)) self.subjects.clear() for subj_list in preprint_subjects: subj_hierarchy = [] for s in subj_list: subj_hierarchy.append(s) if subj_hierarchy: validate_subject_hierarchy(subj_hierarchy) for s_id in subj_hierarchy: self.subjects.add(Subject.load(s_id)) self.save(old_subjects=old_subjects)
def set_subjects(self, preprint_subjects, auth, save=False): if not self.node.has_permission(auth.user, ADMIN): raise PermissionsError('Only admins can change a preprint\'s subjects.') self.subjects.clear() for subj_list in preprint_subjects: subj_hierarchy = [] for s in subj_list: subj_hierarchy.append(s) if subj_hierarchy: validate_subject_hierarchy(subj_hierarchy) for s_id in subj_hierarchy: self.subjects.add(Subject.load(s_id)) if save: self.save()
def add_subjects_to_paleorxiv(): paleoarix = PreprintProvider.objects.get(_id='paleorxiv') bepress_subject = Subject.objects.get(text='Paleontology', provider___id='osf') life_sciences = Subject.objects.get(text='Earth and Life Sciences', provider=paleoarix) ichnology = Subject(text='Ichnology', provider=paleoarix, parent=life_sciences, bepress_subject=bepress_subject) ichnology.save() taphonomy = Subject(text='Taphonomy', provider=paleoarix, parent=life_sciences, bepress_subject=bepress_subject) taphonomy.save() paleoarix.save()
def test_format_preprint(self): res = format_preprint(self.preprint) assert set(gn['@type'] for gn in res) == {'creator', 'contributor', 'throughsubjects', 'subject', 'throughtags', 'tag', 'workidentifier', 'agentidentifier', 'person', 'preprint', 'workrelation', 'creativework'} nodes = dict(enumerate(res)) preprint = nodes.pop(next(k for k, v in nodes.items() if v['@type'] == 'preprint')) assert preprint['title'] == self.preprint.node.title assert preprint['description'] == self.preprint.node.description assert preprint['is_deleted'] == (not self.preprint.is_published or not self.preprint.node.is_public or self.preprint.node.is_preprint_orphan) assert preprint['date_updated'] == self.preprint.date_modified.isoformat() assert preprint['date_published'] == self.preprint.date_published.isoformat() tags = [nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'tag'] through_tags = [nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'throughtags'] assert sorted(tag['@id'] for tag in tags) == sorted(tt['tag']['@id'] for tt in through_tags) assert sorted(tag['name'] for tag in tags) == ['preprint', 'spoderman'] subjects = [nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'subject'] through_subjects = [nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'throughsubjects'] assert sorted(subject['@id'] for subject in subjects) == sorted(tt['subject']['@id'] for tt in through_subjects) assert sorted(subject['name'] for subject in subjects) == [Subject.load(s).text for h in self.preprint.subjects for s in h] people = sorted([nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'person'], key=lambda x: x['given_name']) expected_people = sorted([{ '@type': 'person', 'given_name': u'BoJack', 'family_name': u'Horseman', }, { '@type': 'person', 'given_name': self.user.given_name, 'family_name': self.user.family_name, }, { '@type': 'person', 'given_name': self.preprint.node.creator.given_name, 'family_name': self.preprint.node.creator.family_name, }], key=lambda x: x['given_name']) for i, p in enumerate(expected_people): expected_people[i]['@id'] = people[i]['@id'] assert people == expected_people creators = sorted([nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'creator'], key=lambda x: x['order_cited']) assert creators == [{ '@id': creators[0]['@id'], '@type': 'creator', 'order_cited': 0, 'cited_as': u'{}'.format(self.preprint.node.creator.fullname), 'agent': {'@id': [p['@id'] for p in people if p['given_name'] == self.preprint.node.creator.given_name][0], '@type': 'person'}, 'creative_work': {'@id': preprint['@id'], '@type': preprint['@type']}, }, { '@id': creators[1]['@id'], '@type': 'creator', 'order_cited': 1, 'cited_as': u'BoJack Horseman', 'agent': {'@id': [p['@id'] for p in people if p['given_name'] == u'BoJack'][0], '@type': 'person'}, 'creative_work': {'@id': preprint['@id'], '@type': preprint['@type']}, }] contributors = [nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'contributor'] assert contributors == [{ '@id': contributors[0]['@id'], '@type': 'contributor', 'cited_as': u'{}'.format(self.user.fullname), 'agent': {'@id': [p['@id'] for p in people if p['given_name'] == self.user.given_name][0], '@type': 'person'}, 'creative_work': {'@id': preprint['@id'], '@type': preprint['@type']}, }] agentidentifiers = {nodes.pop(k)['uri'] for k, v in nodes.items() if v['@type'] == 'agentidentifier'} assert agentidentifiers == set([ 'mailto:' + self.user.username, 'mailto:' + self.preprint.node.creator.username, self.user.profile_image_url(), self.preprint.node.creator.profile_image_url(), ]) | set(urlparse.urljoin(settings.DOMAIN, user.profile_url) for user in self.preprint.node.contributors if user.is_registered) related_work = next(nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'creativework') assert set(related_work.keys()) == {'@id', '@type'} # Empty except @id and @type doi = next(nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'workidentifier' and 'doi' in v['uri']) assert doi['creative_work'] == related_work workidentifiers = [nodes.pop(k)['uri'] for k, v in nodes.items() if v['@type'] == 'workidentifier'] assert workidentifiers == [urlparse.urljoin(settings.DOMAIN, self.preprint.url)] relation = nodes.pop(nodes.keys()[0]) assert relation == {'@id': relation['@id'], '@type': 'workrelation', 'related': {'@id': related_work['@id'], '@type': related_work['@type']}, 'subject': {'@id': preprint['@id'], '@type': preprint['@type']}} assert nodes == {}