Example #1
0
def format_preprint(preprint):
    preprint_graph = GraphNode('preprint', **{
        'title': preprint.node.title,
        'description': preprint.node.description or '',
        'is_deleted': (
            not preprint.is_published or
            not preprint.node.is_public or
            preprint.node.is_preprint_orphan or
            preprint.node.tags.filter(name='qatest').exists() or
            preprint.node.is_deleted
        ),
        'date_updated': preprint.date_modified.isoformat(),
        'date_published': preprint.date_published.isoformat() if preprint.date_published else None
    })

    to_visit = [
        preprint_graph,
        GraphNode('workidentifier', creative_work=preprint_graph, uri=urlparse.urljoin(settings.DOMAIN, preprint._id + '/'))
    ]

    if preprint.get_identifier('doi'):
        to_visit.append(GraphNode('workidentifier', creative_work=preprint_graph, uri='http://dx.doi.org/{}'.format(preprint.get_identifier('doi').value)))

    if preprint.provider.domain_redirect_enabled:
        to_visit.append(GraphNode('workidentifier', creative_work=preprint_graph, uri=preprint.absolute_url))

    if preprint.article_doi:
        # Article DOI refers to a clone of this preprint on another system and therefore does not qualify as an identifier for this preprint
        related_work = GraphNode('creativework')
        to_visit.append(GraphNode('workrelation', subject=preprint_graph, related=related_work))
        to_visit.append(GraphNode('workidentifier', creative_work=related_work, uri='http://dx.doi.org/{}'.format(preprint.article_doi)))

    preprint_graph.attrs['tags'] = [
        GraphNode('throughtags', creative_work=preprint_graph, tag=GraphNode('tag', name=tag))
        for tag in preprint.node.tags.values_list('name', flat=True) if tag
    ]

    preprint_graph.attrs['subjects'] = [
        GraphNode('throughsubjects', creative_work=preprint_graph, subject=GraphNode('subject', name=subject))
        for subject in set(s.bepress_text for s in preprint.subjects.all())
    ]

    to_visit.extend(format_contributor(preprint_graph, user, preprint.node.get_visible(user), i) for i, user in enumerate(preprint.node.contributors))
    to_visit.extend(GraphNode('AgentWorkRelation', creative_work=preprint_graph, agent=GraphNode('institution', name=institution))
                    for institution in preprint.node.affiliated_institutions.values_list('name', flat=True))

    visited = set()
    to_visit.extend(preprint_graph.get_related())

    while True:
        if not to_visit:
            break
        n = to_visit.pop(0)
        if n in visited:
            continue
        visited.add(n)
        to_visit.extend(list(n.get_related()))

    return [node.serialize() for node in visited]
Example #2
0
def format_registration(node):
    is_qa_node = bool(set(settings.DO_NOT_INDEX_LIST['tags']).intersection(node.tags.all().values_list('name', flat=True))) \
        or any(substring in node.title for substring in settings.DO_NOT_INDEX_LIST['titles'])

    registration_graph = GraphNode(
        'registration', **{
            'title':
            node.title,
            'description':
            node.description or '',
            'is_deleted':
            not node.is_public or node.is_deleted or is_qa_node,
            'date_published':
            node.registered_date.isoformat() if node.registered_date else None,
            'registration_type':
            node.registered_schema.first().name
            if node.registered_schema else None,
            'withdrawn':
            node.is_retracted,
            'justification':
            node.retraction.justification if node.retraction else None,
        })

    to_visit = [
        registration_graph,
        GraphNode('workidentifier',
                  creative_work=registration_graph,
                  uri=urlparse.urljoin(settings.DOMAIN, node.url))
    ]

    registration_graph.attrs['tags'] = [
        GraphNode('throughtags',
                  creative_work=registration_graph,
                  tag=GraphNode('tag', name=tag._id))
        for tag in node.tags.all() or [] if tag._id
    ]

    to_visit.extend(
        format_contributor(registration_graph, user,
                           bool(user._id in node.visible_contributor_ids), i)
        for i, user in enumerate(node.contributors))
    to_visit.extend(
        GraphNode('AgentWorkRelation',
                  creative_work=registration_graph,
                  agent=GraphNode('institution', name=institution.name))
        for institution in node.affiliated_institutions.all())

    visited = set()
    to_visit.extend(registration_graph.get_related())

    while True:
        if not to_visit:
            break
        n = to_visit.pop(0)
        if n in visited:
            continue
        visited.add(n)
        to_visit.extend(list(n.get_related()))

    return [node_.serialize() for node_ in visited]
Example #3
0
def format_registration(node):
    # TODO: Add parent and root info?
    registration_graph = GraphNode(
        'registration',
        **{
            'title':
            node.title,
            'description':
            node.description or '',
            'is_deleted':
            not node.is_public or 'qatest' in (node.tags or [])
            or node.is_deleted,
            'date_published':
            node.registered_date.isoformat() if node.registered_date else None,
            'registration_type':
            node.registered_schema[0].name if node.registered_schema else None,
            'withdrawn':
            node.is_retracted,
            # TODO: Should this recurse up to the node's root or nah?
            'justification':
            node.retraction.justification if node.retraction else None,
        })

    to_visit = [
        registration_graph,
        GraphNode('workidentifier',
                  creative_work=registration_graph,
                  uri=urlparse.urljoin(settings.DOMAIN, node.url))
    ]

    registration_graph.attrs['tags'] = [
        GraphNode('throughtags',
                  creative_work=registration_graph,
                  tag=GraphNode('tag', name=tag._id))
        for tag in node.tags or [] if tag._id
    ]

    to_visit.extend(
        format_contributor(registration_graph, user,
                           bool(user._id in node.visible_contributor_ids), i)
        for i, user in enumerate(node.contributors))
    to_visit.extend(
        GraphNode('AgentWorkRelation',
                  creative_work=registration_graph,
                  agent=GraphNode('institution', name=institution.name))
        for institution in node.affiliated_institutions)

    visited = set()
    to_visit.extend(registration_graph.get_related())

    while True:
        if not to_visit:
            break
        n = to_visit.pop(0)
        if n in visited:
            continue
        visited.add(n)
        to_visit.extend(list(n.get_related()))
    return [node_.serialize() for node_ in visited]
Example #4
0
def format_registration(node):
    is_qa_node = bool(set(settings.DO_NOT_INDEX_LIST['tags']).intersection(node.tags.all().values_list('name', flat=True))) \
        or any(substring in node.title for substring in settings.DO_NOT_INDEX_LIST['titles'])

    registration_graph = GraphNode('registration', **{
        'title': node.title,
        'description': node.description or '',
        'is_deleted': not node.is_public or node.is_deleted or is_qa_node,
        'date_published': node.registered_date.isoformat() if node.registered_date else None,
        'registration_type': node.registered_schema.first().name if node.registered_schema else None,
        'withdrawn': node.is_retracted,
        'justification': node.retraction.justification if node.retraction else None,
    })

    to_visit = [
        registration_graph,
        GraphNode('workidentifier', creative_work=registration_graph, uri=urlparse.urljoin(settings.DOMAIN, node.url))
    ]

    registration_graph.attrs['tags'] = [
        GraphNode('throughtags', creative_work=registration_graph, tag=GraphNode('tag', name=tag._id))
        for tag in node.tags.all() or [] if tag._id
    ]

    to_visit.extend(format_contributor(registration_graph, user, bool(user._id in node.visible_contributor_ids), i) for i, user in enumerate(node.contributors))
    to_visit.extend(GraphNode('AgentWorkRelation', creative_work=registration_graph, agent=GraphNode('institution', name=institution.name)) for institution in node.affiliated_institutions.all())

    if node.parent_node:
        parent = GraphNode('registration')
        to_visit.extend([
            parent,
            GraphNode('workidentifier', creative_work=parent, uri=urlparse.urljoin(settings.DOMAIN, node.parent_node.url)),
            GraphNode('ispartof', subject=registration_graph, related=parent),
        ])

    visited = set()
    to_visit.extend(registration_graph.get_related())

    while True:
        if not to_visit:
            break
        n = to_visit.pop(0)
        if n in visited:
            continue
        visited.add(n)
        to_visit.extend(list(n.get_related()))

    return [node_.serialize() for node_ in visited]
Example #5
0
def format_preprint(preprint, share_type, old_subjects=None):
    if old_subjects is None:
        old_subjects = []
    from osf.models import Subject
    old_subjects = [Subject.objects.get(id=s) for s in old_subjects]
    preprint_graph = GraphNode(
        share_type,
        **{
            'title':
            preprint.node.title,
            'description':
            preprint.node.description or '',
            'is_deleted':
            (not preprint.verified_publishable
             or preprint.node.tags.filter(name='qatest').exists()),
            # Note: Changing any preprint attribute that is pulled from the node, like title, will NOT bump
            # the preprint's date modified but will bump the node's date_modified.
            # We have to send the latest date to SHARE to actually get the result to be updated.
            # If we send a date_updated that is <= the one we previously sent, SHARE will ignore any changes
            # because it looks like a race condition that arose from preprints being resent to SHARE on
            # every step of preprint creation.
            'date_updated':
            max(preprint.modified, preprint.node.modified).isoformat(),
            'date_published':
            preprint.date_published.isoformat()
            if preprint.date_published else None
        })

    to_visit = [
        preprint_graph,
        GraphNode('workidentifier',
                  creative_work=preprint_graph,
                  uri=urlparse.urljoin(settings.DOMAIN, preprint._id + '/'))
    ]

    if preprint.get_identifier('doi'):
        to_visit.append(
            GraphNode('workidentifier',
                      creative_work=preprint_graph,
                      uri='http://dx.doi.org/{}'.format(
                          preprint.get_identifier('doi').value)))

    if preprint.provider.domain_redirect_enabled:
        to_visit.append(
            GraphNode('workidentifier',
                      creative_work=preprint_graph,
                      uri=preprint.absolute_url))

    if preprint.article_doi:
        # Article DOI refers to a clone of this preprint on another system and therefore does not qualify as an identifier for this preprint
        related_work = GraphNode('creativework')
        to_visit.append(
            GraphNode('workrelation',
                      subject=preprint_graph,
                      related=related_work))
        to_visit.append(
            GraphNode('workidentifier',
                      creative_work=related_work,
                      uri='http://dx.doi.org/{}'.format(preprint.article_doi)))

    preprint_graph.attrs['tags'] = [
        GraphNode('throughtags',
                  creative_work=preprint_graph,
                  tag=GraphNode('tag', name=tag))
        for tag in preprint.node.tags.values_list('name', flat=True) if tag
    ]

    current_subjects = [
        GraphNode('throughsubjects',
                  creative_work=preprint_graph,
                  is_deleted=False,
                  subject=format_subject(s)) for s in preprint.subjects.all()
    ]
    deleted_subjects = [
        GraphNode('throughsubjects',
                  creative_work=preprint_graph,
                  is_deleted=True,
                  subject=format_subject(s)) for s in old_subjects
        if not preprint.subjects.filter(id=s.id).exists()
    ]
    preprint_graph.attrs['subjects'] = current_subjects + deleted_subjects

    to_visit.extend(
        format_contributor(preprint_graph, user, preprint.node.get_visible(
            user), i) for i, user in enumerate(preprint.node.contributors))
    to_visit.extend(
        GraphNode('AgentWorkRelation',
                  creative_work=preprint_graph,
                  agent=GraphNode('institution', name=institution))
        for institution in preprint.node.affiliated_institutions.values_list(
            'name', flat=True))

    visited = set()
    to_visit.extend(preprint_graph.get_related())

    while True:
        if not to_visit:
            break
        n = to_visit.pop(0)
        if n in visited:
            continue
        visited.add(n)
        to_visit.extend(list(n.get_related()))

    return [node.serialize() for node in visited]
Example #6
0
def format_preprint(preprint, share_type, old_subjects=None):
    if old_subjects is None:
        old_subjects = []
    from osf.models import Subject
    old_subjects = [Subject.objects.get(id=s) for s in old_subjects]
    preprint_graph = GraphNode(
        share_type, **{
            'title':
            preprint.title,
            'description':
            preprint.description or '',
            'is_deleted':
            ((not preprint.verified_publishable and not preprint.is_retracted)
             or preprint.tags.filter(name='qatest').exists()),
            'date_updated':
            preprint.modified.isoformat(),
            'date_published':
            preprint.date_published.isoformat()
            if preprint.date_published else None
        })
    to_visit = [
        preprint_graph,
        GraphNode('workidentifier',
                  creative_work=preprint_graph,
                  uri=urlparse.urljoin(settings.DOMAIN, preprint._id + '/'))
    ]

    if preprint.get_identifier('doi'):
        to_visit.append(
            GraphNode('workidentifier',
                      creative_work=preprint_graph,
                      uri='https://doi.org/{}'.format(
                          preprint.get_identifier('doi').value)))

    if preprint.provider.domain_redirect_enabled:
        to_visit.append(
            GraphNode('workidentifier',
                      creative_work=preprint_graph,
                      uri=preprint.absolute_url))

    if preprint.article_doi:
        # Article DOI refers to a clone of this preprint on another system and therefore does not qualify as an identifier for this preprint
        related_work = GraphNode('creativework')
        to_visit.append(
            GraphNode('workrelation',
                      subject=preprint_graph,
                      related=related_work))
        to_visit.append(
            GraphNode('workidentifier',
                      creative_work=related_work,
                      uri='https://doi.org/{}'.format(preprint.article_doi)))

    preprint_graph.attrs['tags'] = [
        GraphNode('throughtags',
                  creative_work=preprint_graph,
                  tag=GraphNode('tag', name=tag))
        for tag in preprint.tags.values_list('name', flat=True) if tag
    ]

    current_subjects = [
        GraphNode('throughsubjects',
                  creative_work=preprint_graph,
                  is_deleted=False,
                  subject=format_subject(s)) for s in preprint.subjects.all()
    ]
    deleted_subjects = [
        GraphNode('throughsubjects',
                  creative_work=preprint_graph,
                  is_deleted=True,
                  subject=format_subject(s)) for s in old_subjects
        if not preprint.subjects.filter(id=s.id).exists()
    ]
    preprint_graph.attrs['subjects'] = current_subjects + deleted_subjects

    to_visit.extend(
        format_contributor(preprint_graph, user, preprint.get_visible(user), i)
        for i, user in enumerate(preprint.contributors))

    visited = set()
    to_visit.extend(preprint_graph.get_related())

    while True:
        if not to_visit:
            break
        n = to_visit.pop(0)
        if n in visited:
            continue
        visited.add(n)
        to_visit.extend(list(n.get_related()))

    return [node.serialize() for node in visited]
Example #7
0
def format_preprint(preprint):
    preprint_graph = GraphNode(
        'preprint', **{
            'title':
            preprint.node.title,
            'description':
            preprint.node.description or '',
            'is_deleted':
            not preprint.is_published or not preprint.node.is_public
            or preprint.node.is_preprint_orphan
            or 'qatest' in (preprint.node.tags or [])
            or preprint.node.is_deleted,
            'date_updated':
            preprint.date_modified.isoformat(),
            'date_published':
            preprint.date_published.isoformat()
            if preprint.date_published else None
        })

    to_visit = [
        preprint_graph,
        GraphNode('workidentifier',
                  creative_work=preprint_graph,
                  uri=urlparse.urljoin(settings.DOMAIN, preprint.url))
    ]

    if preprint.article_doi:
        to_visit.append(
            GraphNode('workidentifier',
                      creative_work=preprint_graph,
                      uri='http://dx.doi.org/{}'.format(preprint.article_doi)))

    preprint_graph.attrs['tags'] = [
        GraphNode('throughtags',
                  creative_work=preprint_graph,
                  tag=GraphNode('tag', name=tag._id))
        for tag in preprint.node.tags or [] if tag._id
    ]

    preprint_graph.attrs['subjects'] = [
        GraphNode('throughsubjects',
                  creative_work=preprint_graph,
                  subject=GraphNode('subject', name=subject))
        for subject in set(
            x['text'] for hier in preprint.get_subjects() or [] for x in hier)
        if subject
    ]

    to_visit.extend(
        format_contributor(
            preprint_graph, user,
            bool(user._id in preprint.node.visible_contributor_ids), i)
        for i, user in enumerate(preprint.node.contributors))
    to_visit.extend(
        GraphNode('AgentWorkRelation',
                  creative_work=preprint_graph,
                  agent=GraphNode('institution', name=institution.name))
        for institution in preprint.node.affiliated_institutions)

    visited = set()
    to_visit.extend(preprint_graph.get_related())

    while True:
        if not to_visit:
            break
        n = to_visit.pop(0)
        if n in visited:
            continue
        visited.add(n)
        to_visit.extend(list(n.get_related()))

    return [node.serialize() for node in visited]
Example #8
0
def format_preprint(preprint):
    preprint_graph = GraphNode(
        'preprint', **{
            'title':
            preprint.node.title,
            'description':
            preprint.node.description or '',
            'is_deleted':
            (not preprint.is_published or not preprint.node.is_public
             or preprint.node.is_preprint_orphan
             or preprint.node.tags.filter(name='qatest').exists()
             or preprint.node.is_deleted),
            'date_updated':
            preprint.date_modified.isoformat(),
            'date_published':
            preprint.date_published.isoformat()
            if preprint.date_published else None
        })

    to_visit = [
        preprint_graph,
        GraphNode('workidentifier',
                  creative_work=preprint_graph,
                  uri=urlparse.urljoin(settings.DOMAIN, preprint._id + '/'))
    ]

    if preprint.provider.domain_redirect_enabled:
        to_visit.append(
            GraphNode('workidentifier',
                      creative_work=preprint_graph,
                      uri=preprint.absolute_url))

    if preprint.article_doi:
        # Article DOI refers to a clone of this preprint on another system and therefore does not qualify as an identifier for this preprint
        related_work = GraphNode('creativework')
        to_visit.append(
            GraphNode('workrelation',
                      subject=preprint_graph,
                      related=related_work))
        to_visit.append(
            GraphNode('workidentifier',
                      creative_work=related_work,
                      uri='http://dx.doi.org/{}'.format(preprint.article_doi)))

    preprint_graph.attrs['tags'] = [
        GraphNode('throughtags',
                  creative_work=preprint_graph,
                  tag=GraphNode('tag', name=tag))
        for tag in preprint.node.tags.values_list('name', flat=True) if tag
    ]

    preprint_graph.attrs['subjects'] = [
        GraphNode('throughsubjects',
                  creative_work=preprint_graph,
                  subject=GraphNode('subject', name=subject))
        for subject in set(s.bepress_text for s in preprint.subjects.all())
    ]

    to_visit.extend(
        format_contributor(preprint_graph, user, preprint.node.get_visible(
            user), i) for i, user in enumerate(preprint.node.contributors))
    to_visit.extend(
        GraphNode('AgentWorkRelation',
                  creative_work=preprint_graph,
                  agent=GraphNode('institution', name=institution))
        for institution in preprint.node.affiliated_institutions.values_list(
            'name', flat=True))

    visited = set()
    to_visit.extend(preprint_graph.get_related())

    while True:
        if not to_visit:
            break
        n = to_visit.pop(0)
        if n in visited:
            continue
        visited.add(n)
        to_visit.extend(list(n.get_related()))

    return [node.serialize() for node in visited]
Example #9
0
def format_preprint(preprint, share_type, old_subjects=None):
    if old_subjects is None:
        old_subjects = []
    from osf.models import Subject
    old_subjects = [Subject.objects.get(id=s) for s in old_subjects]
    preprint_graph = GraphNode(share_type, **{
        'title': preprint.node.title,
        'description': preprint.node.description or '',
        'is_deleted': (
            not preprint.verified_publishable or
            preprint.node.tags.filter(name='qatest').exists()
        ),
        # Note: Changing any preprint attribute that is pulled from the node, like title, will NOT bump
        # the preprint's date modified but will bump the node's date_modified.
        # We have to send the latest date to SHARE to actually get the result to be updated.
        # If we send a date_updated that is <= the one we previously sent, SHARE will ignore any changes
        # because it looks like a race condition that arose from preprints being resent to SHARE on
        # every step of preprint creation.
        'date_updated': max(preprint.modified, preprint.node.modified).isoformat(),
        'date_published': preprint.date_published.isoformat() if preprint.date_published else None
    })

    to_visit = [
        preprint_graph,
        GraphNode('workidentifier', creative_work=preprint_graph, uri=urlparse.urljoin(settings.DOMAIN, preprint._id + '/'))
    ]

    if preprint.get_identifier('doi'):
        to_visit.append(GraphNode('workidentifier', creative_work=preprint_graph, uri='http://dx.doi.org/{}'.format(preprint.get_identifier('doi').value)))

    if preprint.provider.domain_redirect_enabled:
        to_visit.append(GraphNode('workidentifier', creative_work=preprint_graph, uri=preprint.absolute_url))

    if preprint.article_doi:
        # Article DOI refers to a clone of this preprint on another system and therefore does not qualify as an identifier for this preprint
        related_work = GraphNode('creativework')
        to_visit.append(GraphNode('workrelation', subject=preprint_graph, related=related_work))
        to_visit.append(GraphNode('workidentifier', creative_work=related_work, uri='http://dx.doi.org/{}'.format(preprint.article_doi)))

    preprint_graph.attrs['tags'] = [
        GraphNode('throughtags', creative_work=preprint_graph, tag=GraphNode('tag', name=tag))
        for tag in preprint.node.tags.values_list('name', flat=True) if tag
    ]

    current_subjects = [
        GraphNode('throughsubjects', creative_work=preprint_graph, is_deleted=False, subject=format_subject(s))
        for s in preprint.subjects.all()
    ]
    deleted_subjects = [
        GraphNode('throughsubjects', creative_work=preprint_graph, is_deleted=True, subject=format_subject(s))
        for s in old_subjects if not preprint.subjects.filter(id=s.id).exists()
    ]
    preprint_graph.attrs['subjects'] = current_subjects + deleted_subjects

    to_visit.extend(format_contributor(preprint_graph, user, preprint.node.get_visible(user), i) for i, user in enumerate(preprint.node.contributors))
    to_visit.extend(GraphNode('AgentWorkRelation', creative_work=preprint_graph, agent=GraphNode('institution', name=institution))
                    for institution in preprint.node.affiliated_institutions.values_list('name', flat=True))

    visited = set()
    to_visit.extend(preprint_graph.get_related())

    while True:
        if not to_visit:
            break
        n = to_visit.pop(0)
        if n in visited:
            continue
        visited.add(n)
        to_visit.extend(list(n.get_related()))

    return [node.serialize() for node in visited]