def update_preprint(preprint, index=None, bulk=False, async_update=False): from addons.osfstorage.models import OsfStorageFile index = index or INDEX for file_ in paginated( OsfStorageFile, Q(target_content_type=ContentType.objects.get_for_model( type(preprint)), target_object_id=preprint.id)): update_file(file_, index=index) is_qa_preprint = bool( set(settings.DO_NOT_INDEX_LIST['tags']).intersection( preprint.tags.all().values_list('name', flat=True))) or any( substring in preprint.title for substring in settings.DO_NOT_INDEX_LIST['titles']) if not preprint.verified_publishable or preprint.is_spam or ( preprint.spam_status == SpamStatus.FLAGGED and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH) or is_qa_preprint: delete_doc(preprint._id, preprint, category='preprint', index=index) else: category = 'preprint' elastic_document = serialize_preprint(preprint, category) if bulk: return elastic_document else: client().index(index=index, doc_type=category, id=preprint._id, body=elastic_document, refresh=True)
def get_events(self, date): """ Get all node logs from a given date for a 24 hour period, ending at the date given. """ super(UserDomainEvents, self).get_events(date) # In the end, turn the date back into a datetime at midnight for queries date = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) logger.info('Gathering user domains between {} and {}'.format( date, (date + timedelta(days=1)).isoformat() )) user_query = (Q(date_confirmed__lt=date + timedelta(days=1)) & Q(date_confirmed__gte=date) & Q(username__isnull=False)) users = paginated(OSFUser, query=user_query) user_domain_events = [] for user in users: user_date = user.date_confirmed.replace(tzinfo=pytz.UTC) event = { 'keen': {'timestamp': user_date.isoformat()}, 'date': user_date.isoformat(), 'domain': user.username.split('@')[-1] } user_domain_events.append(event) logger.info('User domains collected. {} users and their email domains.'.format(len(user_domain_events))) return user_domain_events
def get_events(self, date): """ Get all node logs from a given date for a 24 hour period, ending at the date given. """ super(UserDomainEvents, self).get_events(date) # In the end, turn the date back into a datetime at midnight for queries date = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) logger.info('Gathering user domains between {} and {}'.format( date, (date + timedelta(1)).isoformat())) user_query = (Q('date_confirmed', 'lt', date + timedelta(1)) & Q('date_confirmed', 'gte', date) & Q('username', 'ne', None)) users = paginated(OSFUser, query=user_query) user_domain_events = [] for user in users: user_date = user.date_confirmed.replace(tzinfo=pytz.UTC) event = { 'keen': { 'timestamp': user_date.isoformat() }, 'date': user_date.isoformat(), 'domain': user.username.split('@')[-1] } user_domain_events.append(event) logger.info( 'User domains collected. {} users and their email domains.'.format( len(user_domain_events))) return user_domain_events
def get_events(self, date): """ Get all node logs from a given date for a 24 hour period, ending at the date given. """ super(NodeLogEvents, self).get_events(date) # In the end, turn the date back into a datetime at midnight for queries date = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) logger.info('Gathering node logs between {} and {}'.format( date, (date + timedelta(1)).isoformat() )) node_log_query = Q('date', 'lt', date + timedelta(1)) & Q('date', 'gte', date) node_logs = paginated(NodeLog, query=node_log_query) node_log_events = [] for node_log in node_logs: log_date = node_log.date.replace(tzinfo=pytz.UTC) event = { 'keen': {'timestamp': log_date.isoformat()}, 'date': log_date.isoformat(), 'action': node_log.action } if node_log.user: event.update({'user_id': node_log.user._id}) node_log_events.append(event) logger.info('NodeLogs counted. {} NodeLogs.'.format(len(node_log_events))) return node_log_events
def update_node(node, index=None, bulk=False, async_update=False): from addons.osfstorage.models import OsfStorageFile index = index or INDEX for file_ in paginated( OsfStorageFile, Q(target_content_type=ContentType.objects.get_for_model( type(node)), target_object_id=node.id)): update_file(file_, index=index) is_qa_node = bool( set(settings.DO_NOT_INDEX_LIST['tags']).intersection( node.tags.all().values_list('name', flat=True))) or any( substring in node.title for substring in settings.DO_NOT_INDEX_LIST['titles']) if node.is_deleted or not node.is_public or node.archiving or node.is_spam or ( node.spam_status == SpamStatus.FLAGGED and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH ) or node.is_quickfiles or is_qa_node: delete_doc(node._id, node, index=index) else: category = get_doctype_from_node(node) elastic_document = serialize_node(node, category) if bulk: return elastic_document else: client().index(index=index, doc_type=category, id=node._id, body=elastic_document, refresh=True)
def get_events(self, date=None): super(AddonSnapshot, self).get_events(date) counts = [] addons_available = { k: v for k, v in [(addon.short_name, addon) for addon in ADDONS_AVAILABLE] } for short_name, addon in addons_available.iteritems(): has_external_account = hasattr(addon.models.get('nodesettings'), 'external_account') connected_count = 0 deleted_count = 0 disconnected_count = 0 node_settings_model = addon.models.get('nodesettings') if node_settings_model: for node_settings in paginated(node_settings_model): if node_settings.owner and not node_settings.owner.all_tags.filter( name='old_node_collection', system=True).exists(): connected_count += 1 deleted_count = addon.models['nodesettings'].objects.filter( deleted=True).count() if addon.models.get( 'nodesettings') else 0 if has_external_account: disconnected_count = addon.models[ 'nodesettings'].objects.filter( external_account__isnull=True, deleted=False ).count() if addon.models.get('nodesettings') else 0 else: if addon.models.get('nodesettings'): for nsm in addon.models['nodesettings'].objects.filter( deleted=False): if nsm.configured and not nsm.complete: disconnected_count += 1 total = connected_count + deleted_count + disconnected_count usage_counts = get_enabled_authorized_linked( addon.models.get('usersettings'), has_external_account, addon.short_name) counts.append({ 'provider': { 'name': short_name }, 'users': usage_counts, 'nodes': { 'total': total, 'connected': connected_count, 'deleted': deleted_count, 'disconnected': disconnected_count } }) logger.info( '{} counted. Users with a linked node: {}, Total connected nodes: {}.' .format(addon.short_name, usage_counts['linked'], total)) return counts
def get_events(self, date): super(UserSummary, self).get_events(date) # Convert to a datetime at midnight for queries and the timestamp timestamp_datetime = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) query_datetime = timestamp_datetime + timedelta(1) active_user_query = (Q('is_registered', 'eq', True) & Q('password', 'ne', None) & Q('merged_by', 'eq', None) & Q('date_disabled', 'eq', None) & Q('date_confirmed', 'ne', None) & Q('date_confirmed', 'lt', query_datetime)) active_users = 0 depth_users = 0 profile_edited = 0 user_pages = paginated(OSFUser, query=active_user_query) for user in user_pages: active_users += 1 log_count = count_user_logs(user) if log_count >= LOG_THRESHOLD: depth_users += 1 if user.social or user.schools or user.jobs: profile_edited += 1 counts = { 'keen': { 'timestamp': timestamp_datetime.isoformat() }, 'status': { 'active': active_users, 'depth': depth_users, 'unconfirmed': OSFUser.find( Q('date_registered', 'lt', query_datetime) & Q('date_confirmed', 'eq', None)).count(), 'deactivated': OSFUser.find( Q('date_disabled', 'ne', None) & Q('date_disabled', 'lt', query_datetime)).count(), 'merged': OSFUser.find( Q('date_registered', 'lt', query_datetime) & Q('merged_by', 'ne', None)).count(), 'profile_edited': profile_edited } } logger.info( 'Users counted. Active: {}, Depth: {}, Unconfirmed: {}, Deactivated: {}, Merged: {}, Profile Edited: {}' .format(counts['status']['active'], counts['status']['depth'], counts['status']['unconfirmed'], counts['status']['deactivated'], counts['status']['merged'], counts['status']['profile_edited'])) return [counts]
def main(dry=True): count = 0 for node in paginated(AbstractNode, increment=1000): true_root = node.get_root() if not node.root or node.root.id != true_root.id: count += 1 logger.info('Setting root for node {} to {}'.format(node._id, true_root._id)) if not dry: AbstractNode.objects.filter(id=node.id).update(root=true_root) logger.info('Finished migrating {} nodes'.format(count))
def get_enabled_authorized_linked(user_settings_list, has_external_account, short_name): """ Gather the number of users who have at least one node in each of the stages for an addon :param user_settings_list: list of user_settings for a particualr addon :param has_external_account: where addon is derrived from, determines method to load node settings :param short_name: short name of addon to get correct node_settings :return: dict with number of users that have at least one project at each stage """ from addons.forward.models import NodeSettings as ForwardNodeSettings num_enabled = 0 # of users w/ 1+ addon account connected num_authorized = 0 # of users w/ 1+ addon account connected to 1+ node num_linked = 0 # of users w/ 1+ addon account connected to 1+ node and configured # osfstorage and wiki don't have user_settings, so always assume they're enabled, authorized, linked if short_name == 'osfstorage' or short_name == 'wiki': num_enabled = num_authorized = num_linked = OSFUser.objects.filter( is_registered=True, password__isnull=False, merged_by__isnull=True, date_disabled__isnull=True, date_confirmed__isnull=False).count() elif short_name == 'forward': num_enabled = num_authorized = ForwardNodeSettings.objects.count() num_linked = ForwardNodeSettings.objects.filter( url__isnull=False).count() else: for user_settings in paginated(user_settings_list): node_settings_list = [] if has_external_account: if user_settings.has_auth: num_enabled += 1 node_settings_list = [ AbstractNode.load(guid).get_addon(short_name) for guid in user_settings.oauth_grants.keys() ] else: num_enabled += 1 node_settings_list = [ AbstractNode.load(guid).get_addon(short_name) for guid in user_settings.nodes_authorized ] if any([ns.has_auth for ns in node_settings_list if ns]): num_authorized += 1 if any([(ns.complete and ns.configured) for ns in node_settings_list if ns]): num_linked += 1 return { 'enabled': num_enabled, 'authorized': num_authorized, 'linked': num_linked }
def main(dry=True): count = 0 for node in paginated(AbstractNode, increment=1000): true_root = node.get_root() if not node.root or node.root.id != true_root.id: count += 1 logger.info('Setting root for node {} to {}'.format( node._id, true_root._id)) if not dry: AbstractNode.objects.filter(id=node.id).update(root=true_root) logger.info('Finished migrating {} nodes'.format(count))
def migrate_users(index): logger.info('Migrating users to index: {}'.format(index)) n_migr = 0 n_iter = 0 users = paginated(OSFUser, query=None, each=True) for user in users: if user.is_active: search.update_user(user, index=index) n_migr += 1 n_iter += 1 logger.info('Users iterated: {0}\nUsers migrated: {1}'.format( n_iter, n_migr))
def get_events(self, date=None): super(AddonSnapshot, self).get_events(date) counts = [] addons_available = {k: v for k, v in [(addon.short_name, addon) for addon in ADDONS_AVAILABLE]} for short_name, addon in addons_available.iteritems(): has_external_account = hasattr(addon.models.get('nodesettings'), 'external_account') connected_count = 0 deleted_count = 0 disconnected_count = 0 node_settings_model = addon.models.get('nodesettings') if node_settings_model: for node_settings in paginated(node_settings_model): if node_settings.owner and not node_settings.owner.all_tags.filter(name='old_node_collection', system=True).exists(): connected_count += 1 deleted_count = addon.models['nodesettings'].objects.filter(deleted=True).count() if addon.models.get('nodesettings') else 0 if has_external_account: disconnected_count = addon.models['nodesettings'].objects.filter(external_account__isnull=True, deleted=False).count() if addon.models.get('nodesettings') else 0 else: if addon.models.get('nodesettings'): for nsm in addon.models['nodesettings'].objects.filter(deleted=False): if nsm.configured and not nsm.complete: disconnected_count += 1 total = connected_count + deleted_count + disconnected_count usage_counts = get_enabled_authorized_linked(addon.models.get('usersettings'), has_external_account, addon.short_name) counts.append({ 'provider': { 'name': short_name }, 'users': usage_counts, 'nodes': { 'total': total, 'connected': connected_count, 'deleted': deleted_count, 'disconnected': disconnected_count } }) logger.info( '{} counted. Users with a linked node: {}, Total connected nodes: {}.'.format( addon.short_name, usage_counts['linked'], total ) ) return counts
def update_preprint(preprint, index=None, bulk=False, async_update=False): from addons.osfstorage.models import OsfStorageFile index = index or INDEX for file_ in paginated(OsfStorageFile, Q(target_content_type=ContentType.objects.get_for_model(type(preprint)), target_object_id=preprint.id)): update_file(file_, index=index) is_qa_preprint = bool(set(settings.DO_NOT_INDEX_LIST['tags']).intersection(preprint.tags.all().values_list('name', flat=True))) or any(substring in preprint.title for substring in settings.DO_NOT_INDEX_LIST['titles']) if not preprint.verified_publishable or preprint.is_spam or (preprint.spam_status == SpamStatus.FLAGGED and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH) or is_qa_preprint: delete_doc(preprint._id, preprint, category='preprint', index=index) else: category = 'preprint' elastic_document = serialize_preprint(preprint, category) if bulk: return elastic_document else: client().index(index=index, doc_type=category, id=preprint._id, body=elastic_document, refresh=True)
def update_node(node, index=None, bulk=False, async_update=False): from addons.osfstorage.models import OsfStorageFile index = index or INDEX for file_ in paginated(OsfStorageFile, Q(target_content_type=ContentType.objects.get_for_model(type(node)), target_object_id=node.id)): update_file(file_, index=index) is_qa_node = bool(set(settings.DO_NOT_INDEX_LIST['tags']).intersection(node.tags.all().values_list('name', flat=True))) or any(substring in node.title for substring in settings.DO_NOT_INDEX_LIST['titles']) if node.is_deleted or not node.is_public or node.archiving or node.is_spam or (node.spam_status == SpamStatus.FLAGGED and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH) or node.is_quickfiles or is_qa_node: delete_doc(node._id, node, index=index) else: category = get_doctype_from_node(node) elastic_document = serialize_node(node, category) if bulk: return elastic_document else: client().index(index=index, doc_type=category, id=node._id, body=elastic_document, refresh=True)
def get_enabled_authorized_linked(user_settings_list, has_external_account, short_name): """ Gather the number of users who have at least one node in each of the stages for an addon :param user_settings_list: list of user_settings for a particualr addon :param has_external_account: where addon is derrived from, determines method to load node settings :param short_name: short name of addon to get correct node_settings :return: dict with number of users that have at least one project at each stage """ from addons.forward.models import NodeSettings as ForwardNodeSettings num_enabled = 0 # of users w/ 1+ addon account connected num_authorized = 0 # of users w/ 1+ addon account connected to 1+ node num_linked = 0 # of users w/ 1+ addon account connected to 1+ node and configured # osfstorage and wiki don't have user_settings, so always assume they're enabled, authorized, linked if short_name == 'osfstorage' or short_name == 'wiki': num_enabled = num_authorized = num_linked = OSFUser.objects.filter( is_registered=True, password__isnull=False, merged_by__isnull=True, date_disabled__isnull=True, date_confirmed__isnull=False ).count() elif short_name == 'forward': num_enabled = num_authorized = ForwardNodeSettings.objects.count() num_linked = ForwardNodeSettings.objects.filter(url__isnull=False).count() else: for user_settings in paginated(user_settings_list): node_settings_list = [] if has_external_account: if user_settings.has_auth: num_enabled += 1 node_settings_list = [AbstractNode.load(guid).get_addon(short_name) for guid in user_settings.oauth_grants.keys()] else: num_enabled += 1 node_settings_list = [AbstractNode.load(guid).get_addon(short_name) for guid in user_settings.nodes_authorized] if any([ns.has_auth for ns in node_settings_list if ns]): num_authorized += 1 if any([(ns.complete and ns.configured) for ns in node_settings_list if ns]): num_linked += 1 return { 'enabled': num_enabled, 'authorized': num_authorized, 'linked': num_linked }
def remove_search_index(dry_run=True): tag_query = Q() title_query = Q() for tag in DO_NOT_INDEX_LIST['tags']: tag_query |= Q(tags__name = tag) for title in DO_NOT_INDEX_LIST['titles']: title_query |= Q(title__contains = title) increment = 20 nodes = paginated(AbstractNode, query=Q(is_public=True) & (tag_query | title_query), increment=increment, each=True) if dry_run: logger.warn('Dry run mode.') for node in nodes: logger.info('Removing {} with title \'{}\' from search index and SHARE.'.format(node._id, node.title)) else: for node in nodes: update_node(node, bulk=False, async=True) update_node_share(node)
def migrate_nodes(index, query=None): logger.info('Migrating nodes to index: {}'.format(index)) node_query = Q(is_public=True, is_deleted=False) if query: node_query = query & node_query total = AbstractNode.objects.filter(node_query).count() increment = 100 total_pages = (total // increment) + 1 pages = paginated(AbstractNode, query=node_query, increment=increment, each=False, include=['contributor__user__guids']) for page_number, page in enumerate(pages): logger.info('Updating page {} / {}'.format(page_number + 1, total_pages)) AbstractNode.bulk_update_search(page, index=index) logger.info('Nodes migrated: {}'.format(total))
def get_events(self, date): super(UserSummary, self).get_events(date) # Convert to a datetime at midnight for queries and the timestamp timestamp_datetime = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) query_datetime = timestamp_datetime + timedelta(days=1) active_user_query = (Q(is_registered=True) & Q(password__isnull=False) & Q(merged_by__isnull=True) & Q(date_disabled__isnull=True) & Q(date_confirmed__isnull=False) & Q(date_confirmed__lt=query_datetime)) active_users = 0 depth_users = 0 profile_edited = 0 user_pages = paginated(OSFUser, query=active_user_query) for user in user_pages: active_users += 1 log_count = count_user_logs(user) if log_count >= LOG_THRESHOLD: depth_users += 1 if user.social or user.schools or user.jobs: profile_edited += 1 new_users = OSFUser.objects.filter( is_active=True, date_confirmed__gte=timestamp_datetime, date_confirmed__lt=query_datetime) counts = { 'keen': { 'timestamp': timestamp_datetime.isoformat() }, 'status': { 'active': active_users, 'depth': depth_users, 'new_users_daily': new_users.count(), 'new_users_with_institution_daily': new_users.filter( affiliated_institutions__isnull=False).count(), 'unconfirmed': OSFUser.objects.filter(date_registered__lt=query_datetime, date_confirmed__isnull=True).count(), 'deactivated': OSFUser.objects.filter( date_disabled__isnull=False, date_disabled__lt=query_datetime).count(), 'merged': OSFUser.objects.filter(date_registered__lt=query_datetime, merged_by__isnull=False).count(), 'profile_edited': profile_edited, } } try: # Because this data reads from Keen it could fail if Keen read api fails while writing is still allowed counts['status']['stickiness'] = self.calculate_stickiness( timestamp_datetime, query_datetime) except requests.exceptions.ConnectionError: sentry.log_message( 'Unable to read from Keen. stickiness metric not collected for date {}' .format(timestamp_datetime.isoformat())) logger.info( 'Users counted. Active: {}, Depth: {}, Unconfirmed: {}, Deactivated: {}, Merged: {}, Profile Edited: {}' .format(counts['status']['active'], counts['status']['depth'], counts['status']['unconfirmed'], counts['status']['deactivated'], counts['status']['merged'], counts['status']['profile_edited'])) return [counts]
'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant 'extra_search_terms': clean_splitters(node.title), 'preprint_url': node.preprint_url, } if not node.is_retracted: for wiki in node.get_wiki_pages_latest(): # '.' is not allowed in field names in ES2 elastic_document['wikis'][wiki.wiki_page.page_name.replace('.', ' ')] = wiki.raw_text(node) return elastic_document @requires_search def update_node(node, index=None, bulk=False, async=False): from addons.osfstorage.models import OsfStorageFile index = index or INDEX for file_ in paginated(OsfStorageFile, Q(node=node)): update_file(file_, index=index) is_qa_node = bool(set(settings.DO_NOT_INDEX_LIST['tags']).intersection(node.tags.all().values_list('name', flat=True))) or any(substring in node.title for substring in settings.DO_NOT_INDEX_LIST['titles']) if node.is_deleted or not node.is_public or node.archiving or (node.is_spammy and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH) or node.is_quickfiles or is_qa_node: delete_doc(node._id, node, index=index) else: category = get_doctype_from_node(node) elastic_document = serialize_node(node, category) if bulk: return elastic_document else: client().index(index=index, doc_type=category, id=node._id, body=elastic_document, refresh=True) def bulk_update_nodes(serialize, nodes, index=None): """Updates the list of input projects
'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant 'extra_search_terms': clean_splitters(node.title), 'preprint_url': node.preprint_url, } if not node.is_retracted: for wiki in node.get_wiki_pages_latest(): # '.' is not allowed in field names in ES2 elastic_document['wikis'][wiki.wiki_page.page_name.replace('.', ' ')] = wiki.raw_text(node) return elastic_document @requires_search def update_node(node, index=None, bulk=False, async=False): from addons.osfstorage.models import OsfStorageFile index = index or INDEX for file_ in paginated(OsfStorageFile, Q(target_content_type=ContentType.objects.get_for_model(type(node)), target_object_id=node.id)): update_file(file_, index=index) is_qa_node = bool(set(settings.DO_NOT_INDEX_LIST['tags']).intersection(node.tags.all().values_list('name', flat=True))) or any(substring in node.title for substring in settings.DO_NOT_INDEX_LIST['titles']) if node.is_deleted or not node.is_public or node.archiving or (node.is_spammy and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH) or node.is_quickfiles or is_qa_node: delete_doc(node._id, node, index=index) else: category = get_doctype_from_node(node) elastic_document = serialize_node(node, category) if bulk: return elastic_document else: client().index(index=index, doc_type=category, id=node._id, body=elastic_document, refresh=True) def bulk_update_nodes(serialize, nodes, index=None): """Updates the list of input projects
if not node.is_retracted: for wiki in WikiPage.objects.get_wiki_pages_latest(node): # '.' is not allowed in field names in ES2 elastic_document['wikis'][wiki.wiki_page.page_name.replace( '.', ' ')] = wiki.raw_text(node) return elastic_document @requires_search def update_node(node, index=None, bulk=False, async=False): from addons.osfstorage.models import OsfStorageFile index = index or INDEX for file_ in paginated( OsfStorageFile, Q(target_content_type=ContentType.objects.get_for_model( type(node)), target_object_id=node.id)): update_file(file_, index=index) is_qa_node = bool( set(settings.DO_NOT_INDEX_LIST['tags']).intersection( node.tags.all().values_list('name', flat=True))) or any( substring in node.title for substring in settings.DO_NOT_INDEX_LIST['titles']) if node.is_deleted or not node.is_public or node.archiving or ( node.is_spammy and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH ) or node.is_quickfiles or is_qa_node: delete_doc(node._id, node, index=index) else: category = get_doctype_from_node(node)
} if not node.is_retracted: for wiki in NodeWikiPage.objects.filter( guids___id__in=node.wiki_pages_current.values()): # '.' is not allowed in field names in ES2 elastic_document['wikis'][wiki.page_name.replace( '.', ' ')] = wiki.raw_text(node) return elastic_document @requires_search def update_node(node, index=None, bulk=False, async=False): from addons.osfstorage.models import OsfStorageFile index = index or INDEX for file_ in paginated(OsfStorageFile, Q(node=node)): update_file(file_, index=index) if node.is_deleted or not node.is_public or node.archiving or ( node.is_spammy and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH) or node.is_quickfiles: delete_doc(node._id, node, index=index) else: category = get_doctype_from_node(node) elastic_document = serialize_node(node, category) if bulk: return elastic_document else: client().index(index=index, doc_type=category, id=node._id,
'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant 'extra_search_terms': clean_splitters(node.title), 'preprint_url': node.preprint_url, } if not node.is_retracted: for wiki in NodeWikiPage.objects.filter(guids___id__in=node.wiki_pages_current.values()): # '.' is not allowed in field names in ES2 elastic_document['wikis'][wiki.page_name.replace('.', ' ')] = wiki.raw_text(node) return elastic_document @requires_search def update_node(node, index=None, bulk=False, async=False): from addons.osfstorage.models import OsfStorageFile index = index or INDEX for file_ in paginated(OsfStorageFile, Q('node', 'eq', node)): update_file(file_, index=index) if node.is_deleted or not node.is_public or node.archiving or (node.is_spammy and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH): delete_doc(node._id, node, index=index) else: category = get_doctype_from_node(node) elastic_document = serialize_node(node, category) if bulk: return elastic_document else: client().index(index=index, doc_type=category, id=node._id, body=elastic_document, refresh=True) def bulk_update_nodes(serialize, nodes, index=None): """Updates the list of input projects
def get_events(self, date): super(UserSummary, self).get_events(date) # Convert to a datetime at midnight for queries and the timestamp timestamp_datetime = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) query_datetime = timestamp_datetime + timedelta(days=1) active_user_query = ( Q(is_registered=True) & Q(password__isnull=False) & Q(merged_by__isnull=True) & Q(date_disabled__isnull=True) & Q(date_confirmed__isnull=False) & Q(date_confirmed__lt=query_datetime) ) active_users = 0 depth_users = 0 profile_edited = 0 user_pages = paginated(OSFUser, query=active_user_query) for user in user_pages: active_users += 1 log_count = count_user_logs(user) if log_count >= LOG_THRESHOLD: depth_users += 1 if user.social or user.schools or user.jobs: profile_edited += 1 new_users = OSFUser.objects.filter(is_active=True, date_confirmed__gte=timestamp_datetime, date_confirmed__lt=query_datetime) counts = { 'keen': { 'timestamp': timestamp_datetime.isoformat() }, 'status': { 'active': active_users, 'depth': depth_users, 'new_users_daily': new_users.count(), 'new_users_with_institution_daily': new_users.filter(affiliated_institutions__isnull=False).count(), 'unconfirmed': OSFUser.objects.filter(date_registered__lt=query_datetime, date_confirmed__isnull=True).count(), 'deactivated': OSFUser.objects.filter(date_disabled__isnull=False, date_disabled__lt=query_datetime).count(), 'merged': OSFUser.objects.filter(date_registered__lt=query_datetime, merged_by__isnull=False).count(), 'profile_edited': profile_edited, } } try: # Because this data reads from Keen it could fail if Keen read api fails while writing is still allowed counts['status']['stickiness'] = self.calculate_stickiness(timestamp_datetime, query_datetime) except requests.exceptions.ConnectionError: sentry.log_message('Unable to read from Keen. stickiness metric not collected for date {}'.format(timestamp_datetime.isoformat())) logger.info( 'Users counted. Active: {}, Depth: {}, Unconfirmed: {}, Deactivated: {}, Merged: {}, Profile Edited: {}'.format( counts['status']['active'], counts['status']['depth'], counts['status']['unconfirmed'], counts['status']['deactivated'], counts['status']['merged'], counts['status']['profile_edited'] ) ) return [counts]