Ejemplo n.º 1
0
def create_subjects_recursive(custom_provider, root_text, exclude_texts, parent=None):
    logger.info('Duplicating BePress subject {} on {}'.format(root_text, custom_provider._id))
    bepress_subj = Subject.objects.get(provider=BEPRESS_PROVIDER, text=root_text)
    custom_subj = Subject(text=root_text, parent=parent, bepress_subject=bepress_subj, provider=custom_provider)
    custom_subj.save()
    # This is not a problem now, as all excluded subjects are leafs, but it could be problematic if non-leafs had their children excluded.
    # It could also be problematic if they didn't, if any of those children are used by existing preprints.
    # TODO: Determine correct resolution
    for child_text in bepress_subj.children.exclude(text__in=exclude_texts).values_list('text', flat=True):
        create_subjects_recursive(custom_provider, child_text, exclude_texts, parent=custom_subj)
Ejemplo n.º 2
0
def create_subjects_recursive(custom_provider, root_text, exclude_texts, parent=None):
    logger.info('Duplicating BePress subject {} on {}'.format(root_text, custom_provider._id))
    bepress_subj = Subject.objects.get(provider=BEPRESS_PROVIDER, text=root_text)
    custom_subj = Subject(text=root_text, parent=parent, bepress_subject=bepress_subj, provider=custom_provider)
    custom_subj.save()
    # This is not a problem now, as all excluded subjects are leafs, but it could be problematic if non-leafs had their children excluded.
    # It could also be problematic if they didn't, if any of those children are used by existing preprints.
    # TODO: Determine correct resolution
    for child_text in bepress_subj.children.exclude(text__in=exclude_texts).values_list('text', flat=True):
        create_subjects_recursive(custom_provider, child_text, exclude_texts, parent=custom_subj)
Ejemplo n.º 3
0
def map_custom_subject(custom_provider, name, parent, mapping):
    logger.info('Attempting to create subject {} on {} from {} with {}'.format(name, custom_provider._id, mapping, 'parent {}'.format(parent) if parent else 'no parent'))
    if parent:
        parent_subject = Subject.objects.filter(provider=custom_provider, text=parent).first()
    else:
        parent_subject = None
    bepress_subject = Subject.objects.get(provider=BEPRESS_PROVIDER, text=mapping)
    if parent and not parent_subject:
        return False
    custom_subject = Subject(provider=custom_provider, text=name, parent=parent_subject, bepress_subject=bepress_subject)
    custom_subject.save()
    return True
Ejemplo n.º 4
0
def map_custom_subject(custom_provider, name, parent, mapping):
    logger.info('Attempting to create subject {} on {} from {} with {}'.format(name, custom_provider._id, mapping, 'parent {}'.format(parent) if parent else 'no parent'))
    if parent:
        parent_subject = Subject.objects.filter(provider=custom_provider, text=parent).first()
    else:
        parent_subject = None
    bepress_subject = Subject.objects.get(provider=BEPRESS_PROVIDER, text=mapping)
    if parent and not parent_subject:
        return False
    custom_subject = Subject(provider=custom_provider, text=name, parent=parent_subject, bepress_subject=bepress_subject)
    custom_subject.save()
    return True
Ejemplo n.º 5
0
def validate_subject_hierarchy(subject_hierarchy):
    from osf.models import Subject
    validated_hierarchy, raw_hierarchy = [], set(subject_hierarchy)
    for subject_id in subject_hierarchy:
        subject = Subject.load(subject_id)
        if not subject:
            raise ValidationValueError('Subject with id <{}> could not be found.'.format(subject_id))

        if subject.parent:
            continue

        raw_hierarchy.remove(subject_id)
        validated_hierarchy.append(subject._id)

        while raw_hierarchy:
            if not set(subject.children.values_list('_id', flat=True)) & raw_hierarchy:
                raise ValidationValueError('Invalid subject hierarchy: {}'.format(subject_hierarchy))
            else:
                for child in subject.children.filter(_id__in=raw_hierarchy):
                    subject = child
                    validated_hierarchy.append(child._id)
                    raw_hierarchy.remove(child._id)
                    break
        if set(validated_hierarchy) == set(subject_hierarchy):
            return
        else:
            raise ValidationValueError('Invalid subject hierarchy: {}'.format(subject_hierarchy))
    raise ValidationValueError('Unable to find root subject in {}'.format(subject_hierarchy))
Ejemplo n.º 6
0
    def set_subjects(self, preprint_subjects, auth, log=True):
        if not self.has_permission(auth.user, 'write'):
            raise PermissionsError(
                'Must have admin or write permissions to change a preprint\'s subjects.'
            )

        old_subjects = list(self.subjects.values_list('id', flat=True))
        self.subjects.clear()
        for subj_list in preprint_subjects:
            subj_hierarchy = []
            for s in subj_list:
                subj_hierarchy.append(s)
            if subj_hierarchy:
                validate_subject_hierarchy(subj_hierarchy)
                for s_id in subj_hierarchy:
                    self.subjects.add(Subject.load(s_id))

        if log:
            self.add_log(
                action=PreprintLog.SUBJECTS_UPDATED,
                params={
                    'subjects':
                    list(self.subjects.values('_id', 'text')),
                    'old_subjects':
                    list(
                        Subject.objects.filter(id__in=old_subjects).values(
                            '_id', 'text')),
                    'preprint':
                    self._id
                },
                auth=auth,
                save=False,
            )

        self.save(old_subjects=old_subjects)
Ejemplo n.º 7
0
 def get_queryset(self):
     parent = self.request.query_params.get(
         'filter[parents]', None) or self.request.query_params.get(
             'filter[parent]', None)
     provider = get_object_or_error(PreprintProvider,
                                    self.kwargs['provider_id'],
                                    display_name='PreprintProvider')
     if parent:
         if parent == 'null':
             return provider.top_level_subjects
         if provider.subjects.exists():
             return provider.subjects.filter(parent___id=parent)
         else:
             # TODO: Delet this when all PreprintProviders have a mapping
             #  Calculate this here to only have to do it once.
             allowed_parents = [
                 id_ for sublist in provider.subjects_acceptable
                 for id_ in sublist[0]
             ]
             allows_children = [
                 subs[0][-1] for subs in provider.subjects_acceptable
                 if subs[1]
             ]
             return [
                 sub
                 for sub in Subject.find(MQ('parent___id', 'eq', parent))
                 if provider.subjects_acceptable == []
                 or self.is_valid_subject(allows_children=allows_children,
                                          allowed_parents=allowed_parents,
                                          sub=sub)
             ]
     return provider.all_subjects
Ejemplo n.º 8
0
 def to_representation(self, subject):
     if not isinstance(subject, Subject):
         subject = Subject.load(subject)
     if subject is not None:
         return {'id': subject._id,
                 'text': subject.text}
     return None
Ejemplo n.º 9
0
    def set_subjects(self, preprint_subjects, auth, log=True):
        if not self.has_permission(auth.user, 'write'):
            raise PermissionsError('Must have admin or write permissions to change a preprint\'s subjects.')

        old_subjects = list(self.subjects.values_list('id', flat=True))
        self.subjects.clear()
        for subj_list in preprint_subjects:
            subj_hierarchy = []
            for s in subj_list:
                subj_hierarchy.append(s)
            if subj_hierarchy:
                validate_subject_hierarchy(subj_hierarchy)
                for s_id in subj_hierarchy:
                    self.subjects.add(Subject.load(s_id))

        if log:
            self.add_log(
                action=PreprintLog.SUBJECTS_UPDATED,
                params={
                    'subjects': list(self.subjects.values('_id', 'text')),
                    'old_subjects': list(Subject.objects.filter(id__in=old_subjects).values('_id', 'text')),
                    'preprint': self._id
                },
                auth=auth,
                save=False,
            )

        self.save(old_subjects=old_subjects)
Ejemplo n.º 10
0
def validate_subject_hierarchy(subject_hierarchy):
    from osf.models import Subject
    validated_hierarchy, raw_hierarchy = [], set(subject_hierarchy)
    for subject_id in subject_hierarchy:
        subject = Subject.load(subject_id)
        if not subject:
            raise ValidationValueError(
                'Subject with id <{}> could not be found.'.format(subject_id))

        if subject.parent:
            continue

        raw_hierarchy.remove(subject_id)
        validated_hierarchy.append(subject._id)

        while raw_hierarchy:
            if not set(subject.children.values_list(
                    '_id', flat=True)) & raw_hierarchy:
                raise ValidationValueError(
                    'Invalid subject hierarchy: {}'.format(subject_hierarchy))
            else:
                for child in subject.children.filter(_id__in=raw_hierarchy):
                    subject = child
                    validated_hierarchy.append(child._id)
                    raw_hierarchy.remove(child._id)
                    break
        if set(validated_hierarchy) == set(subject_hierarchy):
            return
        else:
            raise ValidationValueError(
                'Invalid subject hierarchy: {}'.format(subject_hierarchy))
    raise ValidationValueError(
        'Unable to find root subject in {}'.format(subject_hierarchy))
Ejemplo n.º 11
0
 def to_representation(self, subject):
     if not isinstance(subject, Subject):
         subject = Subject.load(subject)
     if subject is not None:
         return {
             'id': subject._id,
             'text': subject.text,
         }
     return None
Ejemplo n.º 12
0
 def _create(cls, target_class, text=None, parents=[], *args, **kwargs):
     try:
         subject = Subject.find_one(Q('text', 'eq', text))
     except NoResultsFound:
         subject = target_class(*args, **kwargs)
         subject.text = text
         subject.save()
         subject.parents.add(*parents)
         subject.save()
     return subject
Ejemplo n.º 13
0
 def _create(cls, target_class, text=None, parents=[], *args, **kwargs):
     try:
         subject = Subject.find_one(Q('text', 'eq', text))
     except NoResultsFound:
         subject = target_class(*args, **kwargs)
         subject.text = text
         subject.save()
         subject.parents.add(*parents)
         subject.save()
     return subject
Ejemplo n.º 14
0
def get_subject_id(name):
    if name not in SUBJECTS_CACHE:
        subject = None
        try:
            subject = Subject.find_one(Q('text', 'eq', name))
        except NoResultsFound:
            raise Exception('Subject: "{}" not found'.format(name))
        else:
            SUBJECTS_CACHE[name] = subject._id

    return SUBJECTS_CACHE[name]
def get_subject_id(name):
    if name not in SUBJECTS_CACHE:
        subject = None
        try:
            subject = Subject.find_one(Q('text', 'eq', name))
        except NoResultsFound:
            raise Exception('Subject: "{}" not found'.format(name))
        else:
            SUBJECTS_CACHE[name] = subject._id

    return SUBJECTS_CACHE[name]
Ejemplo n.º 16
0
    def setUp(self):
        super(TestTaxonomy, self).setUp()

        # Subject 1 has 3 children
        self.subject1 = SubjectFactory()
        self.subject1_child1 = SubjectFactory(parent=self.subject1)
        self.subject1_child2 = SubjectFactory(parent=self.subject1)

        self.subjects = Subject.find()
        self.url = '/{}taxonomies/'.format(API_BASE)
        self.res = self.app.get(self.url)
        self.data = self.res.json['data']
Ejemplo n.º 17
0
 def get_queryset(self):
     parent = self.request.query_params.get('filter[parents]', None) or self.request.query_params.get('filter[parent]', None)
     provider = get_object_or_error(PreprintProvider, self.kwargs['provider_id'], display_name='PreprintProvider')
     if parent:
         if parent == 'null':
             return provider.top_level_subjects
         if provider.subjects.exists():
             return provider.subjects.filter(parent___id=parent)
         else:
             # TODO: Delet this when all PreprintProviders have a mapping
             #  Calculate this here to only have to do it once.
             allowed_parents = [id_ for sublist in provider.subjects_acceptable for id_ in sublist[0]]
             allows_children = [subs[0][-1] for subs in provider.subjects_acceptable if subs[1]]
             return [sub for sub in Subject.find(MQ('parent___id', 'eq', parent)) if provider.subjects_acceptable == [] or self.is_valid_subject(allows_children=allows_children, allowed_parents=allowed_parents, sub=sub)]
     return provider.all_subjects
Ejemplo n.º 18
0
    def set_subjects(self, preprint_subjects, auth):
        if not self.node.has_permission(auth.user, ADMIN):
            raise PermissionsError('Only admins can change a preprint\'s subjects.')

        old_subjects = list(self.subjects.values_list('id', flat=True))
        self.subjects.clear()
        for subj_list in preprint_subjects:
            subj_hierarchy = []
            for s in subj_list:
                subj_hierarchy.append(s)
            if subj_hierarchy:
                validate_subject_hierarchy(subj_hierarchy)
                for s_id in subj_hierarchy:
                    self.subjects.add(Subject.load(s_id))

        self.save(old_subjects=old_subjects)
Ejemplo n.º 19
0
    def set_subjects(self, preprint_subjects, auth, save=False):
        if not self.node.has_permission(auth.user, ADMIN):
            raise PermissionsError('Only admins can change a preprint\'s subjects.')

        self.subjects.clear()
        for subj_list in preprint_subjects:
            subj_hierarchy = []
            for s in subj_list:
                subj_hierarchy.append(s)
            if subj_hierarchy:
                validate_subject_hierarchy(subj_hierarchy)
                for s_id in subj_hierarchy:
                    self.subjects.add(Subject.load(s_id))

        if save:
            self.save()
Ejemplo n.º 20
0
    def set_subjects(self, preprint_subjects, auth):
        if not self.node.has_permission(auth.user, ADMIN):
            raise PermissionsError('Only admins can change a preprint\'s subjects.')

        old_subjects = list(self.subjects.values_list('id', flat=True))
        self.subjects.clear()
        for subj_list in preprint_subjects:
            subj_hierarchy = []
            for s in subj_list:
                subj_hierarchy.append(s)
            if subj_hierarchy:
                validate_subject_hierarchy(subj_hierarchy)
                for s_id in subj_hierarchy:
                    self.subjects.add(Subject.load(s_id))

        self.save(old_subjects=old_subjects)
def add_subjects_to_paleorxiv():
    paleoarix = PreprintProvider.objects.get(_id='paleorxiv')

    bepress_subject = Subject.objects.get(text='Paleontology',
                                          provider___id='osf')
    life_sciences = Subject.objects.get(text='Earth and Life Sciences',
                                        provider=paleoarix)
    ichnology = Subject(text='Ichnology',
                        provider=paleoarix,
                        parent=life_sciences,
                        bepress_subject=bepress_subject)
    ichnology.save()

    taphonomy = Subject(text='Taphonomy',
                        provider=paleoarix,
                        parent=life_sciences,
                        bepress_subject=bepress_subject)
    taphonomy.save()

    paleoarix.save()
Ejemplo n.º 22
0
    def test_format_preprint(self):
        res = format_preprint(self.preprint)

        assert set(gn['@type'] for gn in res) == {'creator', 'contributor', 'throughsubjects', 'subject', 'throughtags', 'tag', 'workidentifier', 'agentidentifier', 'person', 'preprint', 'workrelation', 'creativework'}

        nodes = dict(enumerate(res))
        preprint = nodes.pop(next(k for k, v in nodes.items() if v['@type'] == 'preprint'))
        assert preprint['title'] == self.preprint.node.title
        assert preprint['description'] == self.preprint.node.description
        assert preprint['is_deleted'] == (not self.preprint.is_published or not self.preprint.node.is_public or self.preprint.node.is_preprint_orphan)
        assert preprint['date_updated'] == self.preprint.date_modified.isoformat()
        assert preprint['date_published'] == self.preprint.date_published.isoformat()

        tags = [nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'tag']
        through_tags = [nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'throughtags']
        assert sorted(tag['@id'] for tag in tags) == sorted(tt['tag']['@id'] for tt in through_tags)
        assert sorted(tag['name'] for tag in tags) == ['preprint', 'spoderman']

        subjects = [nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'subject']
        through_subjects = [nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'throughsubjects']
        assert sorted(subject['@id'] for subject in subjects) == sorted(tt['subject']['@id'] for tt in through_subjects)
        assert sorted(subject['name'] for subject in subjects) == [Subject.load(s).text for h in self.preprint.subjects for s in h]

        people = sorted([nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'person'], key=lambda x: x['given_name'])
        expected_people = sorted([{
            '@type': 'person',
            'given_name': u'BoJack',
            'family_name': u'Horseman',
        }, {
            '@type': 'person',
            'given_name': self.user.given_name,
            'family_name': self.user.family_name,
        }, {
            '@type': 'person',
            'given_name': self.preprint.node.creator.given_name,
            'family_name': self.preprint.node.creator.family_name,
        }], key=lambda x: x['given_name'])
        for i, p in enumerate(expected_people):
            expected_people[i]['@id'] = people[i]['@id']

        assert people == expected_people

        creators = sorted([nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'creator'], key=lambda x: x['order_cited'])
        assert creators == [{
            '@id': creators[0]['@id'],
            '@type': 'creator',
            'order_cited': 0,
            'cited_as': u'{}'.format(self.preprint.node.creator.fullname),
            'agent': {'@id': [p['@id'] for p in people if p['given_name'] == self.preprint.node.creator.given_name][0], '@type': 'person'},
            'creative_work': {'@id': preprint['@id'], '@type': preprint['@type']},
        }, {
            '@id': creators[1]['@id'],
            '@type': 'creator',
            'order_cited': 1,
            'cited_as': u'BoJack Horseman',
            'agent': {'@id': [p['@id'] for p in people if p['given_name'] == u'BoJack'][0], '@type': 'person'},
            'creative_work': {'@id': preprint['@id'], '@type': preprint['@type']},
        }]

        contributors = [nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'contributor']
        assert contributors == [{
            '@id': contributors[0]['@id'],
            '@type': 'contributor',
            'cited_as': u'{}'.format(self.user.fullname),
            'agent': {'@id': [p['@id'] for p in people if p['given_name'] == self.user.given_name][0], '@type': 'person'},
            'creative_work': {'@id': preprint['@id'], '@type': preprint['@type']},
        }]

        agentidentifiers = {nodes.pop(k)['uri'] for k, v in nodes.items() if v['@type'] == 'agentidentifier'}
        assert agentidentifiers == set([
            'mailto:' + self.user.username,
            'mailto:' + self.preprint.node.creator.username,
            self.user.profile_image_url(),
            self.preprint.node.creator.profile_image_url(),
        ]) | set(urlparse.urljoin(settings.DOMAIN, user.profile_url) for user in self.preprint.node.contributors if user.is_registered)

        related_work = next(nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'creativework')
        assert set(related_work.keys()) == {'@id', '@type'}  # Empty except @id and @type

        doi = next(nodes.pop(k) for k, v in nodes.items() if v['@type'] == 'workidentifier' and 'doi' in v['uri'])
        assert doi['creative_work'] == related_work

        workidentifiers = [nodes.pop(k)['uri'] for k, v in nodes.items() if v['@type'] == 'workidentifier']
        assert workidentifiers == [urlparse.urljoin(settings.DOMAIN, self.preprint.url)]

        relation = nodes.pop(nodes.keys()[0])
        assert relation == {'@id': relation['@id'], '@type': 'workrelation', 'related': {'@id': related_work['@id'], '@type': related_work['@type']}, 'subject': {'@id': preprint['@id'], '@type': preprint['@type']}}

        assert nodes == {}