def format_preprint(preprint): preprint_graph = GraphNode('preprint', **{ 'title': preprint.node.title, 'description': preprint.node.description or '', 'is_deleted': ( not preprint.is_published or not preprint.node.is_public or preprint.node.is_preprint_orphan or preprint.node.tags.filter(name='qatest').exists() or preprint.node.is_deleted ), 'date_updated': preprint.date_modified.isoformat(), 'date_published': preprint.date_published.isoformat() if preprint.date_published else None }) to_visit = [ preprint_graph, GraphNode('workidentifier', creative_work=preprint_graph, uri=urlparse.urljoin(settings.DOMAIN, preprint._id + '/')) ] if preprint.get_identifier('doi'): to_visit.append(GraphNode('workidentifier', creative_work=preprint_graph, uri='http://dx.doi.org/{}'.format(preprint.get_identifier('doi').value))) if preprint.provider.domain_redirect_enabled: to_visit.append(GraphNode('workidentifier', creative_work=preprint_graph, uri=preprint.absolute_url)) if preprint.article_doi: # Article DOI refers to a clone of this preprint on another system and therefore does not qualify as an identifier for this preprint related_work = GraphNode('creativework') to_visit.append(GraphNode('workrelation', subject=preprint_graph, related=related_work)) to_visit.append(GraphNode('workidentifier', creative_work=related_work, uri='http://dx.doi.org/{}'.format(preprint.article_doi))) preprint_graph.attrs['tags'] = [ GraphNode('throughtags', creative_work=preprint_graph, tag=GraphNode('tag', name=tag)) for tag in preprint.node.tags.values_list('name', flat=True) if tag ] preprint_graph.attrs['subjects'] = [ GraphNode('throughsubjects', creative_work=preprint_graph, subject=GraphNode('subject', name=subject)) for subject in set(s.bepress_text for s in preprint.subjects.all()) ] to_visit.extend(format_contributor(preprint_graph, user, preprint.node.get_visible(user), i) for i, user in enumerate(preprint.node.contributors)) to_visit.extend(GraphNode('AgentWorkRelation', creative_work=preprint_graph, agent=GraphNode('institution', name=institution)) for institution in preprint.node.affiliated_institutions.values_list('name', flat=True)) visited = set() to_visit.extend(preprint_graph.get_related()) while True: if not to_visit: break n = to_visit.pop(0) if n in visited: continue visited.add(n) to_visit.extend(list(n.get_related())) return [node.serialize() for node in visited]
def format_registration(node): is_qa_node = bool(set(settings.DO_NOT_INDEX_LIST['tags']).intersection(node.tags.all().values_list('name', flat=True))) \ or any(substring in node.title for substring in settings.DO_NOT_INDEX_LIST['titles']) registration_graph = GraphNode( 'registration', **{ 'title': node.title, 'description': node.description or '', 'is_deleted': not node.is_public or node.is_deleted or is_qa_node, 'date_published': node.registered_date.isoformat() if node.registered_date else None, 'registration_type': node.registered_schema.first().name if node.registered_schema else None, 'withdrawn': node.is_retracted, 'justification': node.retraction.justification if node.retraction else None, }) to_visit = [ registration_graph, GraphNode('workidentifier', creative_work=registration_graph, uri=urlparse.urljoin(settings.DOMAIN, node.url)) ] registration_graph.attrs['tags'] = [ GraphNode('throughtags', creative_work=registration_graph, tag=GraphNode('tag', name=tag._id)) for tag in node.tags.all() or [] if tag._id ] to_visit.extend( format_contributor(registration_graph, user, bool(user._id in node.visible_contributor_ids), i) for i, user in enumerate(node.contributors)) to_visit.extend( GraphNode('AgentWorkRelation', creative_work=registration_graph, agent=GraphNode('institution', name=institution.name)) for institution in node.affiliated_institutions.all()) visited = set() to_visit.extend(registration_graph.get_related()) while True: if not to_visit: break n = to_visit.pop(0) if n in visited: continue visited.add(n) to_visit.extend(list(n.get_related())) return [node_.serialize() for node_ in visited]
def format_registration(node): # TODO: Add parent and root info? registration_graph = GraphNode( 'registration', **{ 'title': node.title, 'description': node.description or '', 'is_deleted': not node.is_public or 'qatest' in (node.tags or []) or node.is_deleted, 'date_published': node.registered_date.isoformat() if node.registered_date else None, 'registration_type': node.registered_schema[0].name if node.registered_schema else None, 'withdrawn': node.is_retracted, # TODO: Should this recurse up to the node's root or nah? 'justification': node.retraction.justification if node.retraction else None, }) to_visit = [ registration_graph, GraphNode('workidentifier', creative_work=registration_graph, uri=urlparse.urljoin(settings.DOMAIN, node.url)) ] registration_graph.attrs['tags'] = [ GraphNode('throughtags', creative_work=registration_graph, tag=GraphNode('tag', name=tag._id)) for tag in node.tags or [] if tag._id ] to_visit.extend( format_contributor(registration_graph, user, bool(user._id in node.visible_contributor_ids), i) for i, user in enumerate(node.contributors)) to_visit.extend( GraphNode('AgentWorkRelation', creative_work=registration_graph, agent=GraphNode('institution', name=institution.name)) for institution in node.affiliated_institutions) visited = set() to_visit.extend(registration_graph.get_related()) while True: if not to_visit: break n = to_visit.pop(0) if n in visited: continue visited.add(n) to_visit.extend(list(n.get_related())) return [node_.serialize() for node_ in visited]
def format_registration(node): is_qa_node = bool(set(settings.DO_NOT_INDEX_LIST['tags']).intersection(node.tags.all().values_list('name', flat=True))) \ or any(substring in node.title for substring in settings.DO_NOT_INDEX_LIST['titles']) registration_graph = GraphNode('registration', **{ 'title': node.title, 'description': node.description or '', 'is_deleted': not node.is_public or node.is_deleted or is_qa_node, 'date_published': node.registered_date.isoformat() if node.registered_date else None, 'registration_type': node.registered_schema.first().name if node.registered_schema else None, 'withdrawn': node.is_retracted, 'justification': node.retraction.justification if node.retraction else None, }) to_visit = [ registration_graph, GraphNode('workidentifier', creative_work=registration_graph, uri=urlparse.urljoin(settings.DOMAIN, node.url)) ] registration_graph.attrs['tags'] = [ GraphNode('throughtags', creative_work=registration_graph, tag=GraphNode('tag', name=tag._id)) for tag in node.tags.all() or [] if tag._id ] to_visit.extend(format_contributor(registration_graph, user, bool(user._id in node.visible_contributor_ids), i) for i, user in enumerate(node.contributors)) to_visit.extend(GraphNode('AgentWorkRelation', creative_work=registration_graph, agent=GraphNode('institution', name=institution.name)) for institution in node.affiliated_institutions.all()) if node.parent_node: parent = GraphNode('registration') to_visit.extend([ parent, GraphNode('workidentifier', creative_work=parent, uri=urlparse.urljoin(settings.DOMAIN, node.parent_node.url)), GraphNode('ispartof', subject=registration_graph, related=parent), ]) visited = set() to_visit.extend(registration_graph.get_related()) while True: if not to_visit: break n = to_visit.pop(0) if n in visited: continue visited.add(n) to_visit.extend(list(n.get_related())) return [node_.serialize() for node_ in visited]
def format_preprint(preprint, share_type, old_subjects=None): if old_subjects is None: old_subjects = [] from osf.models import Subject old_subjects = [Subject.objects.get(id=s) for s in old_subjects] preprint_graph = GraphNode( share_type, **{ 'title': preprint.node.title, 'description': preprint.node.description or '', 'is_deleted': (not preprint.verified_publishable or preprint.node.tags.filter(name='qatest').exists()), # Note: Changing any preprint attribute that is pulled from the node, like title, will NOT bump # the preprint's date modified but will bump the node's date_modified. # We have to send the latest date to SHARE to actually get the result to be updated. # If we send a date_updated that is <= the one we previously sent, SHARE will ignore any changes # because it looks like a race condition that arose from preprints being resent to SHARE on # every step of preprint creation. 'date_updated': max(preprint.modified, preprint.node.modified).isoformat(), 'date_published': preprint.date_published.isoformat() if preprint.date_published else None }) to_visit = [ preprint_graph, GraphNode('workidentifier', creative_work=preprint_graph, uri=urlparse.urljoin(settings.DOMAIN, preprint._id + '/')) ] if preprint.get_identifier('doi'): to_visit.append( GraphNode('workidentifier', creative_work=preprint_graph, uri='http://dx.doi.org/{}'.format( preprint.get_identifier('doi').value))) if preprint.provider.domain_redirect_enabled: to_visit.append( GraphNode('workidentifier', creative_work=preprint_graph, uri=preprint.absolute_url)) if preprint.article_doi: # Article DOI refers to a clone of this preprint on another system and therefore does not qualify as an identifier for this preprint related_work = GraphNode('creativework') to_visit.append( GraphNode('workrelation', subject=preprint_graph, related=related_work)) to_visit.append( GraphNode('workidentifier', creative_work=related_work, uri='http://dx.doi.org/{}'.format(preprint.article_doi))) preprint_graph.attrs['tags'] = [ GraphNode('throughtags', creative_work=preprint_graph, tag=GraphNode('tag', name=tag)) for tag in preprint.node.tags.values_list('name', flat=True) if tag ] current_subjects = [ GraphNode('throughsubjects', creative_work=preprint_graph, is_deleted=False, subject=format_subject(s)) for s in preprint.subjects.all() ] deleted_subjects = [ GraphNode('throughsubjects', creative_work=preprint_graph, is_deleted=True, subject=format_subject(s)) for s in old_subjects if not preprint.subjects.filter(id=s.id).exists() ] preprint_graph.attrs['subjects'] = current_subjects + deleted_subjects to_visit.extend( format_contributor(preprint_graph, user, preprint.node.get_visible( user), i) for i, user in enumerate(preprint.node.contributors)) to_visit.extend( GraphNode('AgentWorkRelation', creative_work=preprint_graph, agent=GraphNode('institution', name=institution)) for institution in preprint.node.affiliated_institutions.values_list( 'name', flat=True)) visited = set() to_visit.extend(preprint_graph.get_related()) while True: if not to_visit: break n = to_visit.pop(0) if n in visited: continue visited.add(n) to_visit.extend(list(n.get_related())) return [node.serialize() for node in visited]
def format_preprint(preprint, share_type, old_subjects=None): if old_subjects is None: old_subjects = [] from osf.models import Subject old_subjects = [Subject.objects.get(id=s) for s in old_subjects] preprint_graph = GraphNode( share_type, **{ 'title': preprint.title, 'description': preprint.description or '', 'is_deleted': ((not preprint.verified_publishable and not preprint.is_retracted) or preprint.tags.filter(name='qatest').exists()), 'date_updated': preprint.modified.isoformat(), 'date_published': preprint.date_published.isoformat() if preprint.date_published else None }) to_visit = [ preprint_graph, GraphNode('workidentifier', creative_work=preprint_graph, uri=urlparse.urljoin(settings.DOMAIN, preprint._id + '/')) ] if preprint.get_identifier('doi'): to_visit.append( GraphNode('workidentifier', creative_work=preprint_graph, uri='https://doi.org/{}'.format( preprint.get_identifier('doi').value))) if preprint.provider.domain_redirect_enabled: to_visit.append( GraphNode('workidentifier', creative_work=preprint_graph, uri=preprint.absolute_url)) if preprint.article_doi: # Article DOI refers to a clone of this preprint on another system and therefore does not qualify as an identifier for this preprint related_work = GraphNode('creativework') to_visit.append( GraphNode('workrelation', subject=preprint_graph, related=related_work)) to_visit.append( GraphNode('workidentifier', creative_work=related_work, uri='https://doi.org/{}'.format(preprint.article_doi))) preprint_graph.attrs['tags'] = [ GraphNode('throughtags', creative_work=preprint_graph, tag=GraphNode('tag', name=tag)) for tag in preprint.tags.values_list('name', flat=True) if tag ] current_subjects = [ GraphNode('throughsubjects', creative_work=preprint_graph, is_deleted=False, subject=format_subject(s)) for s in preprint.subjects.all() ] deleted_subjects = [ GraphNode('throughsubjects', creative_work=preprint_graph, is_deleted=True, subject=format_subject(s)) for s in old_subjects if not preprint.subjects.filter(id=s.id).exists() ] preprint_graph.attrs['subjects'] = current_subjects + deleted_subjects to_visit.extend( format_contributor(preprint_graph, user, preprint.get_visible(user), i) for i, user in enumerate(preprint.contributors)) visited = set() to_visit.extend(preprint_graph.get_related()) while True: if not to_visit: break n = to_visit.pop(0) if n in visited: continue visited.add(n) to_visit.extend(list(n.get_related())) return [node.serialize() for node in visited]
def format_preprint(preprint): preprint_graph = GraphNode( 'preprint', **{ 'title': preprint.node.title, 'description': preprint.node.description or '', 'is_deleted': not preprint.is_published or not preprint.node.is_public or preprint.node.is_preprint_orphan or 'qatest' in (preprint.node.tags or []) or preprint.node.is_deleted, 'date_updated': preprint.date_modified.isoformat(), 'date_published': preprint.date_published.isoformat() if preprint.date_published else None }) to_visit = [ preprint_graph, GraphNode('workidentifier', creative_work=preprint_graph, uri=urlparse.urljoin(settings.DOMAIN, preprint.url)) ] if preprint.article_doi: to_visit.append( GraphNode('workidentifier', creative_work=preprint_graph, uri='http://dx.doi.org/{}'.format(preprint.article_doi))) preprint_graph.attrs['tags'] = [ GraphNode('throughtags', creative_work=preprint_graph, tag=GraphNode('tag', name=tag._id)) for tag in preprint.node.tags or [] if tag._id ] preprint_graph.attrs['subjects'] = [ GraphNode('throughsubjects', creative_work=preprint_graph, subject=GraphNode('subject', name=subject)) for subject in set( x['text'] for hier in preprint.get_subjects() or [] for x in hier) if subject ] to_visit.extend( format_contributor( preprint_graph, user, bool(user._id in preprint.node.visible_contributor_ids), i) for i, user in enumerate(preprint.node.contributors)) to_visit.extend( GraphNode('AgentWorkRelation', creative_work=preprint_graph, agent=GraphNode('institution', name=institution.name)) for institution in preprint.node.affiliated_institutions) visited = set() to_visit.extend(preprint_graph.get_related()) while True: if not to_visit: break n = to_visit.pop(0) if n in visited: continue visited.add(n) to_visit.extend(list(n.get_related())) return [node.serialize() for node in visited]
def format_preprint(preprint): preprint_graph = GraphNode( 'preprint', **{ 'title': preprint.node.title, 'description': preprint.node.description or '', 'is_deleted': (not preprint.is_published or not preprint.node.is_public or preprint.node.is_preprint_orphan or preprint.node.tags.filter(name='qatest').exists() or preprint.node.is_deleted), 'date_updated': preprint.date_modified.isoformat(), 'date_published': preprint.date_published.isoformat() if preprint.date_published else None }) to_visit = [ preprint_graph, GraphNode('workidentifier', creative_work=preprint_graph, uri=urlparse.urljoin(settings.DOMAIN, preprint._id + '/')) ] if preprint.provider.domain_redirect_enabled: to_visit.append( GraphNode('workidentifier', creative_work=preprint_graph, uri=preprint.absolute_url)) if preprint.article_doi: # Article DOI refers to a clone of this preprint on another system and therefore does not qualify as an identifier for this preprint related_work = GraphNode('creativework') to_visit.append( GraphNode('workrelation', subject=preprint_graph, related=related_work)) to_visit.append( GraphNode('workidentifier', creative_work=related_work, uri='http://dx.doi.org/{}'.format(preprint.article_doi))) preprint_graph.attrs['tags'] = [ GraphNode('throughtags', creative_work=preprint_graph, tag=GraphNode('tag', name=tag)) for tag in preprint.node.tags.values_list('name', flat=True) if tag ] preprint_graph.attrs['subjects'] = [ GraphNode('throughsubjects', creative_work=preprint_graph, subject=GraphNode('subject', name=subject)) for subject in set(s.bepress_text for s in preprint.subjects.all()) ] to_visit.extend( format_contributor(preprint_graph, user, preprint.node.get_visible( user), i) for i, user in enumerate(preprint.node.contributors)) to_visit.extend( GraphNode('AgentWorkRelation', creative_work=preprint_graph, agent=GraphNode('institution', name=institution)) for institution in preprint.node.affiliated_institutions.values_list( 'name', flat=True)) visited = set() to_visit.extend(preprint_graph.get_related()) while True: if not to_visit: break n = to_visit.pop(0) if n in visited: continue visited.add(n) to_visit.extend(list(n.get_related())) return [node.serialize() for node in visited]
def format_preprint(preprint, share_type, old_subjects=None): if old_subjects is None: old_subjects = [] from osf.models import Subject old_subjects = [Subject.objects.get(id=s) for s in old_subjects] preprint_graph = GraphNode(share_type, **{ 'title': preprint.node.title, 'description': preprint.node.description or '', 'is_deleted': ( not preprint.verified_publishable or preprint.node.tags.filter(name='qatest').exists() ), # Note: Changing any preprint attribute that is pulled from the node, like title, will NOT bump # the preprint's date modified but will bump the node's date_modified. # We have to send the latest date to SHARE to actually get the result to be updated. # If we send a date_updated that is <= the one we previously sent, SHARE will ignore any changes # because it looks like a race condition that arose from preprints being resent to SHARE on # every step of preprint creation. 'date_updated': max(preprint.modified, preprint.node.modified).isoformat(), 'date_published': preprint.date_published.isoformat() if preprint.date_published else None }) to_visit = [ preprint_graph, GraphNode('workidentifier', creative_work=preprint_graph, uri=urlparse.urljoin(settings.DOMAIN, preprint._id + '/')) ] if preprint.get_identifier('doi'): to_visit.append(GraphNode('workidentifier', creative_work=preprint_graph, uri='http://dx.doi.org/{}'.format(preprint.get_identifier('doi').value))) if preprint.provider.domain_redirect_enabled: to_visit.append(GraphNode('workidentifier', creative_work=preprint_graph, uri=preprint.absolute_url)) if preprint.article_doi: # Article DOI refers to a clone of this preprint on another system and therefore does not qualify as an identifier for this preprint related_work = GraphNode('creativework') to_visit.append(GraphNode('workrelation', subject=preprint_graph, related=related_work)) to_visit.append(GraphNode('workidentifier', creative_work=related_work, uri='http://dx.doi.org/{}'.format(preprint.article_doi))) preprint_graph.attrs['tags'] = [ GraphNode('throughtags', creative_work=preprint_graph, tag=GraphNode('tag', name=tag)) for tag in preprint.node.tags.values_list('name', flat=True) if tag ] current_subjects = [ GraphNode('throughsubjects', creative_work=preprint_graph, is_deleted=False, subject=format_subject(s)) for s in preprint.subjects.all() ] deleted_subjects = [ GraphNode('throughsubjects', creative_work=preprint_graph, is_deleted=True, subject=format_subject(s)) for s in old_subjects if not preprint.subjects.filter(id=s.id).exists() ] preprint_graph.attrs['subjects'] = current_subjects + deleted_subjects to_visit.extend(format_contributor(preprint_graph, user, preprint.node.get_visible(user), i) for i, user in enumerate(preprint.node.contributors)) to_visit.extend(GraphNode('AgentWorkRelation', creative_work=preprint_graph, agent=GraphNode('institution', name=institution)) for institution in preprint.node.affiliated_institutions.values_list('name', flat=True)) visited = set() to_visit.extend(preprint_graph.get_related()) while True: if not to_visit: break n = to_visit.pop(0) if n in visited: continue visited.add(n) to_visit.extend(list(n.get_related())) return [node.serialize() for node in visited]