def update_wiki_pages(nodes): for i, node in enumerate(nodes): if node['wiki_pages_versions']: cloned_wiki_pages = {} for key, wiki_versions in node['wiki_pages_versions'].items(): cloned_wiki_pages[key] = [] for wiki_id in wiki_versions: node_wiki = NodeWikiPage.load(wiki_id) if not node_wiki: continue if node_wiki.to_storage()['node'] != node['_id']: if not node_wiki.node: move_to_backup_collection(node_wiki) continue clone = node_wiki.clone_wiki(node['_id']) logger.info('Cloned wiki page {} from node {} to {}'.format(wiki_id, node_wiki.node, node['_id'])) cloned_wiki_pages[key].append(clone._id) # update current wiki page if node_wiki.is_current: wiki_pages_current = node['wiki_pages_current'] wiki_pages_current[key] = clone._id db.node.update({'_id': node['_id']}, {'$set': {'wiki_pages_current': wiki_pages_current}}) else: cloned_wiki_pages[key].append(wiki_id) db.node.update({'_id': node['_id']}, {'$set': {'wiki_pages_versions': cloned_wiki_pages}})
def update_node(node, index='website'): from website.addons.wiki.model import NodeWikiPage component_categories = ['', 'hypothesis', 'methods and measures', 'procedure', 'instrumentation', 'data', 'analysis', 'communication', 'other'] category = 'component' if node.category in component_categories else node.category if category == 'project': elastic_document_id = node._id parent_id = None category = 'registration' if node.is_registration else category else: try: elastic_document_id = node._id parent_id = node.parent_id category = 'registration' if node.is_registration else category except IndexError: # Skip orphaned components return if node.is_deleted or not node.is_public: delete_doc(elastic_document_id, node) else: try: normalized_title = six.u(node.title) except TypeError: normalized_title = node.title normalized_title = unicodedata.normalize('NFKD', normalized_title).encode('ascii', 'ignore') elastic_document = { 'id': elastic_document_id, 'contributors': [ x.fullname for x in node.visible_contributors if x is not None and x.is_active ], 'contributors_url': [ x.profile_url for x in node.visible_contributors if x is not None and x.is_active ], 'title': node.title, 'normalized_title': normalized_title, 'category': category, 'public': node.is_public, 'tags': [tag._id for tag in node.tags if tag], 'description': node.description, 'url': node.url, 'is_registration': node.is_registration, 'registered_date': str(node.registered_date)[:10], 'wikis': {}, 'parent_id': parent_id, 'iso_timestamp': node.date_created, 'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant } for wiki in [ NodeWikiPage.load(x) for x in node.wiki_pages_current.values() ]: elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node) es.index(index=index, doc_type=category, id=elastic_document_id, body=elastic_document, refresh=True)
def update_node(node, index=INDEX): from website.addons.wiki.model import NodeWikiPage component_categories = ['', 'hypothesis', 'methods and measures', 'procedure', 'instrumentation', 'data', 'analysis', 'communication', 'other'] category = 'component' if node.category in component_categories else node.category if category == 'project': elastic_document_id = node._id parent_id = None category = 'registration' if node.is_registration else category else: try: elastic_document_id = node._id parent_id = node.parent_id category = 'registration' if node.is_registration else category except IndexError: # Skip orphaned components return if node.is_deleted or not node.is_public: delete_doc(elastic_document_id, node) else: try: normalized_title = six.u(node.title) except TypeError: normalized_title = node.title normalized_title = unicodedata.normalize('NFKD', normalized_title).encode('ascii', 'ignore') elastic_document = { 'id': elastic_document_id, 'contributors': [ x.fullname for x in node.visible_contributors if x is not None ], 'contributors_url': [ x.profile_url for x in node.visible_contributors if x is not None and x.is_active ], 'title': node.title, 'normalized_title': normalized_title, 'category': category, 'public': node.is_public, 'tags': [tag._id for tag in node.tags if tag], 'description': node.description, 'url': node.url, 'is_registration': node.is_registration, 'registered_date': str(node.registered_date)[:10], 'wikis': {}, 'parent_id': parent_id, 'iso_timestamp': node.date_created, 'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant } for wiki in [ NodeWikiPage.load(x) for x in node.wiki_pages_current.values() ]: elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node) es.index(index=index, doc_type=category, id=elastic_document_id, body=elastic_document, refresh=True)
def update_node(node, index=None, bulk=False): index = index or INDEX from website.addons.wiki.model import NodeWikiPage category = get_doctype_from_node(node) elastic_document_id = node._id parent_id = node.parent_id from website.files.models.osfstorage import OsfStorageFile for file_ in paginated(OsfStorageFile, Q("node", "eq", node)): update_file(file_, index=index) if node.is_deleted or not node.is_public or node.archiving: delete_doc(elastic_document_id, node, index=index) else: try: normalized_title = six.u(node.title) except TypeError: normalized_title = node.title normalized_title = unicodedata.normalize("NFKD", normalized_title).encode("ascii", "ignore") elastic_document = { "id": elastic_document_id, "contributors": [ {"fullname": x.fullname, "url": x.profile_url if x.is_active else None} for x in node.visible_contributors if x is not None ], "title": node.title, "normalized_title": normalized_title, "category": category, "public": node.is_public, "tags": [tag._id for tag in node.tags if tag], "description": node.description, "url": node.url, "is_registration": node.is_registration, "is_pending_registration": node.is_pending_registration, "is_retracted": node.is_retracted, "is_pending_retraction": node.is_pending_retraction, "embargo_end_date": node.embargo_end_date.strftime("%A, %b. %d, %Y") if node.embargo_end_date else False, "is_pending_embargo": node.is_pending_embargo, "registered_date": node.registered_date, "wikis": {}, "parent_id": parent_id, "date_created": node.date_created, "license": serialize_node_license_record(node.license), "affiliated_institutions": [inst.name for inst in node.affiliated_institutions], "boost": int(not node.is_registration) + 1, # This is for making registered projects less relevant } if not node.is_retracted: for wiki in [NodeWikiPage.load(x) for x in node.wiki_pages_current.values()]: elastic_document["wikis"][wiki.page_name] = wiki.raw_text(node) if bulk: return elastic_document else: es.index(index=index, doc_type=category, id=elastic_document_id, body=elastic_document, refresh=True)
def test_old_wiki_versions_not_returned(self): self._set_up_public_project_with_wiki_page() current_wiki = NodeWikiFactory(node=self.public_project, user=self.user) old_version_id = self.public_project.wiki_pages_versions[current_wiki.page_name][-2] old_version = NodeWikiPage.load(old_version_id) url = '/{}wikis/{}/'.format(API_BASE, old_version._id) res = self.app.get(url, expect_errors=True) assert_equal(res.status_code, 404)
def test_registration_wiki_pages_created_pre_registration_get_cloned(self): project = self.set_up_project_with_wiki_page() registration = project.register_node(get_default_metaschema(), Auth(self.user), "", None) # reset wiki pages for test registration.wiki_pages_versions = project.wiki_pages_versions registration.wiki_pages_current = project.wiki_pages_current registration.save() main() registration.reload() wiki_versions = registration.wiki_pages_versions[self.wiki.page_name] current_wiki = NodeWikiPage.load(registration.wiki_pages_current[self.current_wiki.page_name]) assert_equal(current_wiki.node, registration) assert_not_equal(current_wiki._id, self.current_wiki._id) wiki_version = NodeWikiPage.load(wiki_versions[0]) assert_equal(wiki_version.node, registration) assert_not_equal(wiki_version._id, self.current_wiki._id)
def test_forked_project_wiki_pages_created_pre_fork_get_cloned(self): project = self.set_up_project_with_wiki_page() fork = project.fork_node(auth=Auth(self.user)) # reset wiki pages for test fork.wiki_pages_versions = project.wiki_pages_versions fork.wiki_pages_current = project.wiki_pages_current fork.save() main() # update_wiki_pages(self.find_node_record(fork._id)) fork.reload() wiki_versions = fork.wiki_pages_versions[self.wiki.page_name] current_wiki = NodeWikiPage.load(fork.wiki_pages_current[self.current_wiki.page_name]) assert_equal(current_wiki.node, fork) assert_not_equal(current_wiki._id, self.current_wiki._id) wiki_version = NodeWikiPage.load(wiki_versions[0]) assert_equal(wiki_version.node, fork) assert_not_equal(wiki_version._id, self.current_wiki._id)
def test_registration_wiki_pages_created_pre_registration_get_cloned(self): project = self.set_up_project_with_wiki_page() registration = project.register_node(get_default_metaschema(), Auth(self.user), '', None) # reset wiki pages for test registration.wiki_pages_versions = project.wiki_pages_versions registration.wiki_pages_current = project.wiki_pages_current registration.save() main() registration.reload() wiki_versions = registration.wiki_pages_versions[self.wiki.page_name] current_wiki = NodeWikiPage.load(registration.wiki_pages_current[self.current_wiki.page_name]) assert_equal(current_wiki.node, registration) assert_not_equal(current_wiki._id, self.current_wiki._id) wiki_version = NodeWikiPage.load(wiki_versions[0]) assert_equal(wiki_version.node, registration) assert_not_equal(wiki_version._id, self.current_wiki._id)
def serialize_node(node, category): from website.addons.wiki.model import NodeWikiPage elastic_document = {} parent_id = node.parent_id try: normalized_title = six.u(node.title) except TypeError: normalized_title = node.title normalized_title = unicodedata.normalize('NFKD', normalized_title).encode('ascii', 'ignore') elastic_document = { 'id': node._id, 'contributors': [ { 'fullname': x.fullname, 'url': x.profile_url if x.is_active else None } for x in node.visible_contributors if x is not None ], 'title': node.title, 'normalized_title': normalized_title, 'category': category, 'public': node.is_public, 'tags': [tag._id for tag in node.tags if tag], 'description': node.description, 'url': node.url, 'is_registration': node.is_registration, 'is_pending_registration': node.is_pending_registration, 'is_retracted': node.is_retracted, 'is_pending_retraction': node.is_pending_retraction, 'embargo_end_date': node.embargo_end_date.strftime('%A, %b. %d, %Y') if node.embargo_end_date else False, 'is_pending_embargo': node.is_pending_embargo, 'registered_date': node.registered_date, 'wikis': {}, 'parent_id': parent_id, 'date_created': node.date_created, 'license': serialize_node_license_record(node.license), 'affiliated_institutions': [inst.name for inst in node.affiliated_institutions], 'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant 'extra_search_terms': clean_splitters(node.title), } if not node.is_retracted: for wiki in [ NodeWikiPage.load(x) for x in node.wiki_pages_current.values() ]: elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node) return elastic_document
def get_wiki(self, check_permissions=True): pk = self.kwargs[self.wiki_lookup_url_kwarg] wiki = NodeWikiPage.load(pk) if not wiki: raise NotFound if wiki.is_deleted: raise Gone # only show current wiki versions if not wiki.is_current: raise NotFound if check_permissions: # May raise a permission denied self.check_object_permissions(self.request, wiki) return wiki
def test_wiki_pages_with_invalid_nodes_are_removed_after_cloning(self): project = ProjectFactory(creator=self.user, is_public=True) wiki = NodeWikiFactory(node=project) fork = project.fork_node(auth=Auth(self.user)) fork.wiki_pages_versions = project.wiki_pages_versions fork.wiki_pages_current = project.wiki_pages_current fork.save() # Remove original node - wiki.node no longer points to an existing project Node.remove_one(project._id) # clone wiki page main() fork.reload() cloned_wiki_id = fork.wiki_pages_versions[wiki.page_name][0] cloned_wiki = NodeWikiPage.load(cloned_wiki_id) assert_equal(cloned_wiki.node._id, fork._id) # move original wiki page to unmigratedwikipages collection assert_false(db.nodewikipage.find_one({'_id': wiki._id})) assert_true(db.unmigratedwikipages.find_one({'_id': wiki._id}))
def test_wiki_pages_with_invalid_nodes_are_removed_after_cloning(self): project = ProjectFactory(creator=self.user, is_public=True) wiki = NodeWikiFactory(node=project) fork = project.fork_node(auth=Auth(self.user)) fork.wiki_pages_versions = project.wiki_pages_versions fork.wiki_pages_current = project.wiki_pages_current fork.save() # Remove original node - wiki.node no longer points to an existing project Node.remove_one(project._id) # clone wiki page main() fork.reload() cloned_wiki_id = fork.wiki_pages_versions[wiki.page_name][0] cloned_wiki = NodeWikiPage.load(cloned_wiki_id) assert_equal(cloned_wiki.node._id, fork._id) # move original wiki page to unmigratedwikipages collection assert_false(db.nodewikipage.find_one({"_id": wiki._id})) assert_true(db.unmigratedwikipages.find_one({"_id": wiki._id}))
def update_wiki_pages(nodes): for i, node in enumerate(nodes): if node['wiki_pages_versions']: cloned_wiki_pages = {} for key, wiki_versions in node['wiki_pages_versions'].items(): cloned_wiki_pages[key] = [] for wiki_id in wiki_versions: node_wiki = NodeWikiPage.load(wiki_id) if not node_wiki: continue if node_wiki.to_storage()['node'] != node['_id']: if not node_wiki.node: move_to_backup_collection(node_wiki) continue clone = node_wiki.clone_wiki(node['_id']) logger.info( 'Cloned wiki page {} from node {} to {}'.format( wiki_id, node_wiki.node, node['_id'])) cloned_wiki_pages[key].append(clone._id) # update current wiki page if node_wiki.is_current: wiki_pages_current = node['wiki_pages_current'] wiki_pages_current[key] = clone._id db.node.update({'_id': node['_id']}, { '$set': { 'wiki_pages_current': wiki_pages_current } }) else: cloned_wiki_pages[key].append(wiki_id) db.node.update( {'_id': node['_id']}, {'$set': { 'wiki_pages_versions': cloned_wiki_pages }})
def update_node(node, index=None): index = index or INDEX from website.addons.wiki.model import NodeWikiPage category = get_doctype_from_node(node) if category == 'project': elastic_document_id = node._id parent_id = None else: try: elastic_document_id = node._id parent_id = node.parent_id except IndexError: # Skip orphaned components return if node.is_deleted or not node.is_public or node.archiving: delete_doc(elastic_document_id, node) else: try: normalized_title = six.u(node.title) except TypeError: normalized_title = node.title normalized_title = unicodedata.normalize('NFKD', normalized_title).encode('ascii', 'ignore') elastic_document = { 'id': elastic_document_id, 'contributors': [ { 'fullname': x.fullname, 'url': x.profile_url if x.is_active else None } for x in node.visible_contributors if x is not None ], 'title': node.title, 'normalized_title': normalized_title, 'category': category, 'public': node.is_public, 'tags': [tag._id for tag in node.tags if tag], 'description': node.description, 'url': node.url, 'is_registration': node.is_registration, 'is_pending_registration': node.is_pending_registration, 'is_retracted': node.is_retracted, 'is_pending_retraction': node.is_pending_retraction, 'embargo_end_date': node.embargo_end_date.strftime("%A, %b. %d, %Y") if node.embargo_end_date else False, 'is_pending_embargo': node.is_pending_embargo, 'registered_date': node.registered_date, 'wikis': {}, 'parent_id': parent_id, 'date_created': node.date_created, 'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant } if not node.is_retracted: for wiki in [ NodeWikiPage.load(x) for x in node.wiki_pages_current.values() ]: elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node) es.index(index=index, doc_type=category, id=elastic_document_id, body=elastic_document, refresh=True)
def update_node(node, index=None, bulk=False): index = index or INDEX from website.addons.wiki.model import NodeWikiPage category = get_doctype_from_node(node) elastic_document_id = node._id parent_id = node.parent_id from website.files.models.osfstorage import OsfStorageFile for file_ in paginated(OsfStorageFile, Q('node', 'eq', node)): update_file(file_, index=index) if node.is_deleted or not node.is_public or node.archiving: delete_doc(elastic_document_id, node, index=index) else: try: normalized_title = six.u(node.title) except TypeError: normalized_title = node.title normalized_title = unicodedata.normalize('NFKD', normalized_title).encode('ascii', 'ignore') elastic_document = { 'id': elastic_document_id, 'contributors': [ { 'fullname': x.fullname, 'url': x.profile_url if x.is_active else None } for x in node.visible_contributors if x is not None ], 'title': node.title, 'normalized_title': normalized_title, 'category': category, 'public': node.is_public, 'tags': [tag._id for tag in node.tags if tag], 'description': node.description, 'url': node.url, 'is_registration': node.is_registration, 'is_pending_registration': node.is_pending_registration, 'is_retracted': node.is_retracted, 'is_pending_retraction': node.is_pending_retraction, 'embargo_end_date': node.embargo_end_date.strftime('%A, %b. %d, %Y') if node.embargo_end_date else False, 'is_pending_embargo': node.is_pending_embargo, 'registered_date': node.registered_date, 'wikis': {}, 'parent_id': parent_id, 'date_created': node.date_created, 'license': serialize_node_license_record(node.license), 'affiliated_institutions': [inst.name for inst in node.affiliated_institutions], 'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant } if not node.is_retracted: for wiki in [ NodeWikiPage.load(x) for x in node.wiki_pages_current.values() ]: elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node) if bulk: return elastic_document else: es.index(index=index, doc_type=category, id=elastic_document_id, body=elastic_document, refresh=True)
def serialize_node(node, category): from website.addons.wiki.model import NodeWikiPage elastic_document = {} parent_id = node.parent_id try: normalized_title = six.u(node.title) except TypeError: normalized_title = node.title normalized_title = unicodedata.normalize('NFKD', normalized_title).encode( 'ascii', 'ignore') elastic_document = { 'id': node._id, 'contributors': [{ 'fullname': x.fullname, 'url': x.profile_url if x.is_active else None } for x in node.visible_contributors if x is not None], 'title': node.title, 'normalized_title': normalized_title, 'category': category, 'public': node.is_public, 'tags': [tag._id for tag in node.tags if tag], 'description': node.description, 'url': node.url, 'is_registration': node.is_registration, 'is_pending_registration': node.is_pending_registration, 'is_retracted': node.is_retracted, 'is_pending_retraction': node.is_pending_retraction, 'embargo_end_date': node.embargo_end_date.strftime('%A, %b. %d, %Y') if node.embargo_end_date else False, 'is_pending_embargo': node.is_pending_embargo, 'registered_date': node.registered_date, 'wikis': {}, 'parent_id': parent_id, 'date_created': node.date_created, 'license': serialize_node_license_record(node.license), 'affiliated_institutions': [inst.name for inst in node.affiliated_institutions], 'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant 'extra_search_terms': clean_splitters(node.title), } if not node.is_retracted: for wiki in [ NodeWikiPage.load(x) for x in node.wiki_pages_current.values() ]: elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node) return elastic_document
def update_node(node, index=None, bulk=False): index = index or INDEX from website.addons.wiki.model import NodeWikiPage category = get_doctype_from_node(node) elastic_document_id = node._id parent_id = node.parent_id from website.files.models.osfstorage import OsfStorageFile for file_ in paginated(OsfStorageFile, Q('node', 'eq', node)): update_file(file_, index=index) if node.is_deleted or not node.is_public or node.archiving: delete_doc(elastic_document_id, node, index=index) else: try: normalized_title = six.u(node.title) except TypeError: normalized_title = node.title normalized_title = unicodedata.normalize('NFKD', normalized_title).encode( 'ascii', 'ignore') elastic_document = { 'id': elastic_document_id, 'contributors': [{ 'fullname': x.fullname, 'url': x.profile_url if x.is_active else None } for x in node.visible_contributors if x is not None], 'title': node.title, 'normalized_title': normalized_title, 'category': category, 'public': node.is_public, 'tags': [tag._id for tag in node.tags if tag], 'description': node.description, 'url': node.url, 'is_registration': node.is_registration, 'is_pending_registration': node.is_pending_registration, 'is_retracted': node.is_retracted, 'is_pending_retraction': node.is_pending_retraction, 'embargo_end_date': node.embargo_end_date.strftime('%A, %b. %d, %Y') if node.embargo_end_date else False, 'is_pending_embargo': node.is_pending_embargo, 'registered_date': node.registered_date, 'wikis': {}, 'parent_id': parent_id, 'date_created': node.date_created, 'license': serialize_node_license_record(node.license), 'affiliated_institutions': [inst.name for inst in node.affiliated_institutions], 'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant } if not node.is_retracted: for wiki in [ NodeWikiPage.load(x) for x in node.wiki_pages_current.values() ]: elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node) if bulk: return elastic_document else: es.index(index=index, doc_type=category, id=elastic_document_id, body=elastic_document, refresh=True)