Esempio n. 1
0
    def get_events(self, date=None):
        super(AddonSnapshot, self).get_events(date)

        from website.settings import ADDONS_AVAILABLE
        counts = []
        addons_available = {k: v for k, v in [(addon.short_name, addon) for addon in ADDONS_AVAILABLE]}

        for short_name, addon in addons_available.iteritems():
            user_settings_list = []
            node_settings_list = []
            if addon.settings_models.get('user'):
                user_settings_list = [setting for setting in paginated(addon.settings_models['user'])]
            if addon.settings_models.get('node'):
                node_settings_list = [setting for setting in paginated(addon.settings_models['node'])]

            has_external_account = True
            # Check out the first element in node_settings_list to see if it has an external account to check for
            if node_settings_list:
                if AddonNodeSettingsBase in node_settings_list[0].__class__.__bases__:
                    has_external_account = False

            connected_count = 0
            for node_settings in node_settings_list:
                if node_settings.owner and not node_settings.owner.is_bookmark_collection:
                    connected_count += 1
            deleted_count = addon.settings_models['node'].find(Q('deleted', 'eq', True)).count() if addon.settings_models.get('node') else 0

            if has_external_account:
                disconnected_count = addon.settings_models['node'].find(Q('external_account', 'eq', None) & Q('deleted', 'ne', True)).count() if addon.settings_models.get('node') else 0
            else:
                disconnected_count = addon.settings_models['node'].find(Q('configured', 'eq', True) & Q('complete', 'eq', False) & Q('deleted', 'ne', True)).count() if addon.settings_models.get('node') else 0
            total = connected_count + deleted_count + disconnected_count
            usage_counts = get_enabled_authorized_linked(user_settings_list, has_external_account, addon.short_name)

            counts.append({
                'provider': {
                    'name': short_name
                },
                'users': usage_counts,
                'nodes': {
                    'total': total,
                    'connected': connected_count,
                    'deleted': deleted_count,
                    'disconnected': disconnected_count
                }
            })

            logger.info(
                '{} counted. Users with a linked node: {}, Total connected nodes: {}.'.format(
                    addon.short_name,
                    usage_counts['linked'],
                    total
                )
            )
        return counts
Esempio n. 2
0
    def get_events(self, date):
        """ Get all node logs from a given date for a 24 hour period,
        ending at the date given.
        """
        super(UserDomainEvents, self).get_events(date)

        # In the end, turn the date back into a datetime at midnight for queries
        date = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC)

        logger.info('Gathering user domains between {} and {}'.format(
            date, (date + timedelta(1)).isoformat()
        ))
        user_query = (Q('date_confirmed', 'lt', date + timedelta(1)) &
                      Q('date_confirmed', 'gte', date) &
                      Q('username', 'ne', None))
        users = paginated(OSFUser, query=user_query)
        user_domain_events = []
        for user in users:
            user_date = user.date_confirmed.replace(tzinfo=pytz.UTC)
            event = {
                'keen': {'timestamp': user_date.isoformat()},
                'date': user_date.isoformat(),
                'domain': user.username.split('@')[-1]
            }
            user_domain_events.append(event)

        logger.info('User domains collected. {} users and their email domains.'.format(len(user_domain_events)))
        return user_domain_events
Esempio n. 3
0
    def get_events(self, date):
        """ Get all node logs from a given date for a 24 hour period,
        ending at the date given.
        """
        super(NodeLogEvents, self).get_events(date)

        # In the end, turn the date back into a datetime at midnight for queries
        date = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC)

        logger.info('Gathering node logs between {} and {}'.format(
            date, (date + timedelta(1)).isoformat()
        ))

        node_log_query = Q('date', 'lt', date + timedelta(1)) & Q('date', 'gte', date)

        node_logs = paginated(NodeLog, query=node_log_query)
        node_log_events = []
        for node_log in node_logs:
            log_date = node_log.date.replace(tzinfo=pytz.UTC)
            event = {
                'keen': {'timestamp': log_date.isoformat()},
                'date': log_date.isoformat(),
                'action': node_log.action
            }

            if node_log.user:
                event.update({'user_id': node_log.user._id})

            node_log_events.append(event)

        logger.info('NodeLogs counted. {} NodeLogs.'.format(len(node_log_events)))
        return node_log_events
Esempio n. 4
0
    def get_events(self, date):
        super(UserSummary, self).get_events(date)

        # Convert to a datetime at midnight for queries and the timestamp
        timestamp_datetime = datetime(date.year, date.month,
                                      date.day).replace(tzinfo=pytz.UTC)
        query_datetime = timestamp_datetime + timedelta(1)

        active_user_query = (Q('is_registered', 'eq', True)
                             & Q('password', 'ne', None)
                             & Q('merged_by', 'eq', None)
                             & Q('date_disabled', 'eq', None)
                             & Q('date_confirmed', 'ne', None)
                             & Q('date_confirmed', 'lt', query_datetime))

        active_users = 0
        depth_users = 0
        profile_edited = 0
        user_pages = paginated(User, query=active_user_query)
        for user in user_pages:
            active_users += 1
            log_count = count_user_logs(user)
            if log_count >= LOG_THRESHOLD:
                depth_users += 1
            if user.social or user.schools or user.jobs:
                profile_edited += 1

        counts = {
            'keen': {
                'timestamp': timestamp_datetime.isoformat()
            },
            'status': {
                'active':
                active_users,
                'depth':
                depth_users,
                'unconfirmed':
                User.find(
                    Q('date_registered', 'lt', query_datetime)
                    & Q('date_confirmed', 'eq', None)).count(),
                'deactivated':
                User.find(
                    Q('date_disabled', 'ne', None)
                    & Q('date_disabled', 'lt', query_datetime)).count(),
                'merged':
                User.find(
                    Q('date_registered', 'lt', query_datetime)
                    & Q('merged_by', 'ne', None)).count(),
                'profile_edited':
                profile_edited
            }
        }
        logger.info(
            'Users counted. Active: {}, Depth: {}, Unconfirmed: {}, Deactivated: {}, Merged: {}, Profile Edited: {}'
            .format(counts['status']['active'], counts['status']['depth'],
                    counts['status']['unconfirmed'],
                    counts['status']['deactivated'],
                    counts['status']['merged'],
                    counts['status']['profile_edited']))
        return [counts]
Esempio n. 5
0
    def get_events(self, date):
        """ Get all node logs from a given date for a 24 hour period,
        ending at the date given.
        """
        super(NodeLogEvents, self).get_events(date)

        # In the end, turn the date back into a datetime at midnight for queries
        date = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC)

        logger.info("Gathering node logs between {} and {}".format(date, (date + timedelta(1)).isoformat()))

        node_log_query = Q("date", "lt", date + timedelta(1)) & Q("date", "gte", date)

        node_logs = paginated(NodeLog, query=node_log_query)
        node_log_events = []
        for node_log in node_logs:
            log_date = node_log.date.replace(tzinfo=pytz.UTC)
            event = {
                "keen": {"timestamp": log_date.isoformat()},
                "date": log_date.isoformat(),
                "action": node_log.action,
            }

            if node_log.user:
                event.update({"user_id": node_log.user._id})

            node_log_events.append(event)

        logger.info("NodeLogs counted. {} NodeLogs.".format(len(node_log_events)))
        return node_log_events
Esempio n. 6
0
    def get_events(self, date):
        """ Get all node logs from a given date for a 24 hour period,
        ending at the date given.
        """
        super(UserDomainEvents, self).get_events(date)

        # In the end, turn the date back into a datetime at midnight for queries
        date = datetime(date.year, date.month,
                        date.day).replace(tzinfo=pytz.UTC)

        logger.info('Gathering user domains between {} and {}'.format(
            date, (date + timedelta(1)).isoformat()))
        user_query = (Q('date_confirmed', 'lt', date + timedelta(1))
                      & Q('date_confirmed', 'gte', date)
                      & Q('username', 'ne', None))
        users = paginated(User, query=user_query)
        user_domain_events = []
        for user in users:
            user_date = user.date_confirmed.replace(tzinfo=pytz.UTC)
            event = {
                'keen': {
                    'timestamp': user_date.isoformat()
                },
                'date': user_date.isoformat(),
                'domain': user.username.split('@')[-1]
            }
            user_domain_events.append(event)

        logger.info(
            'User domains collected. {} users and their email domains.'.format(
                len(user_domain_events)))
        return user_domain_events
Esempio n. 7
0
def update_node(node, index=None, bulk=False):
    index = index or INDEX
    from website.addons.wiki.model import NodeWikiPage

    category = get_doctype_from_node(node)

    elastic_document_id = node._id
    parent_id = node.parent_id

    from website.files.models.osfstorage import OsfStorageFile

    for file_ in paginated(OsfStorageFile, Q("node", "eq", node)):
        update_file(file_, index=index)

    if node.is_deleted or not node.is_public or node.archiving:
        delete_doc(elastic_document_id, node, index=index)
    else:
        try:
            normalized_title = six.u(node.title)
        except TypeError:
            normalized_title = node.title
        normalized_title = unicodedata.normalize("NFKD", normalized_title).encode("ascii", "ignore")

        elastic_document = {
            "id": elastic_document_id,
            "contributors": [
                {"fullname": x.fullname, "url": x.profile_url if x.is_active else None}
                for x in node.visible_contributors
                if x is not None
            ],
            "title": node.title,
            "normalized_title": normalized_title,
            "category": category,
            "public": node.is_public,
            "tags": [tag._id for tag in node.tags if tag],
            "description": node.description,
            "url": node.url,
            "is_registration": node.is_registration,
            "is_pending_registration": node.is_pending_registration,
            "is_retracted": node.is_retracted,
            "is_pending_retraction": node.is_pending_retraction,
            "embargo_end_date": node.embargo_end_date.strftime("%A, %b. %d, %Y") if node.embargo_end_date else False,
            "is_pending_embargo": node.is_pending_embargo,
            "registered_date": node.registered_date,
            "wikis": {},
            "parent_id": parent_id,
            "date_created": node.date_created,
            "license": serialize_node_license_record(node.license),
            "affiliated_institutions": [inst.name for inst in node.affiliated_institutions],
            "boost": int(not node.is_registration) + 1,  # This is for making registered projects less relevant
        }
        if not node.is_retracted:
            for wiki in [NodeWikiPage.load(x) for x in node.wiki_pages_current.values()]:
                elastic_document["wikis"][wiki.page_name] = wiki.raw_text(node)

        if bulk:
            return elastic_document
        else:
            es.index(index=index, doc_type=category, id=elastic_document_id, body=elastic_document, refresh=True)
Esempio n. 8
0
def count_file_downloads():
    downloads_unique, downloads_total = 0, 0
    for record in paginated(OsfStorageFile):
        page = ':'.join(['download', record.node._id, record._id])
        unique, total = get_basic_counters(page)
        downloads_unique += unique or 0
        downloads_total += total or 0
        clear_modm_cache()
    return downloads_unique, downloads_total
Esempio n. 9
0
def count_file_downloads():
    downloads_unique, downloads_total = 0, 0
    for record in paginated(OsfStorageFile):
        page = ':'.join(['download', record.node._id, record._id])
        unique, total = get_basic_counters(page)
        downloads_unique += unique or 0
        downloads_total += total or 0
        clear_modm_cache()
    return downloads_unique, downloads_total
Esempio n. 10
0
def main(dry=True):
    init_app(set_backends=True, routes=False)  # Sets the storage backends on all models
    count = 0
    for node in paginated(Node, increment=1000):
        if not node.root or node.root._id != node._root._id:
            count += 1
            logger.info('Setting root for node {} to {}'.format(node._id, node._root._id))
            node.root = node._root._id
            if not dry:
                node.save()
    logger.info('Finished migrating {} nodes'.format(count))
Esempio n. 11
0
def migrate_users(index):
    logger.info('Migrating users to index: {}'.format(index))
    n_migr = 0
    n_iter = 0
    users = paginated(User, query=None, increment=1000, each=True)
    for user in users:
        if user.is_active:
            search.update_user(user, index=index)
            n_migr += 1
        n_iter += 1

    logger.info('Users iterated: {0}\nUsers migrated: {1}'.format(n_iter, n_migr))
Esempio n. 12
0
def get_enabled_authorized_linked(user_settings_list, has_external_account,
                                  short_name):
    """ Gather the number of users who have at least one node in each of the stages for an addon

    :param user_settings_list: list of user_settings for a particualr addon
    :param has_external_account: where addon is derrived from, determines method to load node settings
    :param short_name: short name of addon to get correct node_settings
    :return:  dict with number of users that have at least one project at each stage
    """
    from addons.forward.models import NodeSettings as ForwardNodeSettings

    num_enabled = 0  # of users w/ 1+ addon account connected
    num_authorized = 0  # of users w/ 1+ addon account connected to 1+ node
    num_linked = 0  # of users w/ 1+ addon account connected to 1+ node and configured

    # osfstorage and wiki don't have user_settings, so always assume they're enabled, authorized, linked
    if short_name == 'osfstorage' or short_name == 'wiki':
        num_enabled = num_authorized = num_linked = User.find(
            Q('is_registered', 'eq', True) & Q('password', 'ne', None)
            & Q('merged_by', 'eq', None) & Q('date_disabled', 'eq', None)
            & Q('date_confirmed', 'ne', None)).count()

    elif short_name == 'forward':
        num_enabled = num_authorized = ForwardNodeSettings.find().count()
        num_linked = ForwardNodeSettings.find(Q('url', 'ne', None)).count()

    else:
        for user_settings in paginated(user_settings_list):
            node_settings_list = []
            if has_external_account:
                if user_settings.has_auth:
                    num_enabled += 1
                    node_settings_list = [
                        Node.load(guid).get_addon(short_name)
                        for guid in user_settings.oauth_grants.keys()
                    ]
            else:
                num_enabled += 1
                node_settings_list = [
                    Node.load(guid).get_addon(short_name)
                    for guid in user_settings.nodes_authorized
                ]
            if any([ns.has_auth for ns in node_settings_list if ns]):
                num_authorized += 1
                if any([(ns.complete and ns.configured)
                        for ns in node_settings_list if ns]):
                    num_linked += 1
    return {
        'enabled': num_enabled,
        'authorized': num_authorized,
        'linked': num_linked
    }
Esempio n. 13
0
def migrate_users(index):
    logger.info('Migrating users to index: {}'.format(index))
    n_migr = 0
    n_iter = 0
    users = paginated(User, query=None, each=True)
    for user in users:
        if user.is_active:
            search.update_user(user, index=index)
            n_migr += 1
        n_iter += 1

    logger.info('Users iterated: {0}\nUsers migrated: {1}'.format(
        n_iter, n_migr))
Esempio n. 14
0
def migrate_nodes(index):
    logger.info('Migrating nodes to index: {}'.format(index))
    query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)
    total = Node.find(query).count()
    increment = 200
    total_pages = (total // increment) + 1
    pages = paginated(Node, query=query, increment=increment, each=False)
    for page_number, page in enumerate(pages):
        logger.info('Updating page {} / {}'.format(page_number + 1, total_pages))
        Node.bulk_update_search(page, index=index)
        Node._clear_caches()

    logger.info('Nodes migrated: {}'.format(total))
Esempio n. 15
0
    def get_events(self, date=None):
        super(AddonSnapshot, self).get_events(date)


        counts = []
        addons_available = {k: v for k, v in [(addon.short_name, addon) for addon in ADDONS_AVAILABLE]}

        for short_name, addon in addons_available.iteritems():

            has_external_account = hasattr(addon.models.get('nodesettings'), 'external_account')

            connected_count = 0
            deleted_count = 0
            disconnected_count = 0
            node_settings_model = addon.models.get('nodesettings')
            if node_settings_model:
                for node_settings in paginated(node_settings_model):
                    if node_settings.owner and not node_settings.owner.is_bookmark_collection:
                        connected_count += 1
                deleted_count = addon.models['nodesettings'].find(Q('deleted', 'eq', True)).count() if addon.models.get('nodesettings') else 0
                if has_external_account:
                    disconnected_count = addon.models['nodesettings'].find(Q('external_account', 'eq', None) & Q('deleted', 'ne', True)).count() if addon.models.get('nodesettings') else 0
                else:
                    if addon.models.get('nodesettings'):
                        for nsm in addon.models['nodesettings'].find(Q('deleted', 'ne', True)):
                            if nsm.configured and not nsm.complete:
                                disconnected_count += 1
            total = connected_count + deleted_count + disconnected_count
            usage_counts = get_enabled_authorized_linked(addon.models.get('usersettings'), has_external_account, addon.short_name)

            counts.append({
                'provider': {
                    'name': short_name
                },
                'users': usage_counts,
                'nodes': {
                    'total': total,
                    'connected': connected_count,
                    'deleted': deleted_count,
                    'disconnected': disconnected_count
                }
            })

            logger.info(
                '{} counted. Users with a linked node: {}, Total connected nodes: {}.'.format(
                    addon.short_name,
                    usage_counts['linked'],
                    total
                )
            )
        return counts
Esempio n. 16
0
def migrate_nodes(index):
    logger.info('Migrating nodes to index: {}'.format(index))
    query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)
    total = Node.find(query).count()
    increment = 200
    total_pages = (total // increment) + 1
    pages = paginated(Node, query=query, increment=increment, each=False)
    for page_number, page in enumerate(pages):
        logger.info('Updating page {} / {}'.format(page_number + 1,
                                                   total_pages))
        Node.bulk_update_search(page, index=index)
        Node._clear_caches()

    logger.info('Nodes migrated: {}'.format(total))
Esempio n. 17
0
def migrate_nodes(index, query=None):
    logger.info('Migrating nodes to index: {}'.format(index))
    node_query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)
    if query:
        node_query = query & node_query
    total = Node.find(node_query).count()
    increment = 200
    total_pages = (total // increment) + 1
    pages = paginated(Node, query=node_query, increment=increment, each=False, include=['contributor__user__guids'])

    for page_number, page in enumerate(pages):
        logger.info('Updating page {} / {}'.format(page_number + 1, total_pages))
        Node.bulk_update_search(page, index=index)

    logger.info('Nodes migrated: {}'.format(total))
Esempio n. 18
0
def main():
    init_app(set_backends=True, routes=False)
    dry = '--dry' in sys.argv
    if not dry:
        scripts_utils.add_file_logger(logger, __file__)

    nodes = paginated(Node, Q('piwik_site_id', 'eq', None), increment=15)
    count = 0
    for node in nodes:
        logger.info('Provisioning Piwik node for Node {}'.format(node._id))
        if not dry:
            piwik._provision_node(node._id)
            # Throttle to reduce load on Piwik
            time.sleep(1)
        count += 1
    logger.info('Provisioned {} nodes'.format(count))
Esempio n. 19
0
def get_enabled_authorized_linked(user_settings_list, has_external_account, short_name):
    """ Gather the number of users who have at least one node in each of the stages for an addon

    :param user_settings_list: list of user_settings for a particualr addon
    :param has_external_account: where addon is derrived from, determines method to load node settings
    :param short_name: short name of addon to get correct node_settings
    :return:  dict with number of users that have at least one project at each stage
    """
    from addons.forward.models import NodeSettings as ForwardNodeSettings

    num_enabled = 0  # of users w/ 1+ addon account connected
    num_authorized = 0  # of users w/ 1+ addon account connected to 1+ node
    num_linked = 0  # of users w/ 1+ addon account connected to 1+ node and configured

    # osfstorage and wiki don't have user_settings, so always assume they're enabled, authorized, linked
    if short_name == 'osfstorage' or short_name == 'wiki':
        num_enabled = num_authorized = num_linked = User.find(
            Q('is_registered', 'eq', True) &
            Q('password', 'ne', None) &
            Q('merged_by', 'eq', None) &
            Q('date_disabled', 'eq', None) &
            Q('date_confirmed', 'ne', None)
        ).count()

    elif short_name == 'forward':
        num_enabled = num_authorized = ForwardNodeSettings.find().count()
        num_linked = ForwardNodeSettings.find(Q('url', 'ne', None)).count()

    else:
        for user_settings in paginated(user_settings_list):
            node_settings_list = []
            if has_external_account:
                if user_settings.has_auth:
                    num_enabled += 1
                    node_settings_list = [Node.load(guid).get_addon(short_name) for guid in user_settings.oauth_grants.keys()]
            else:
                num_enabled += 1
                node_settings_list = [Node.load(guid).get_addon(short_name) for guid in user_settings.nodes_authorized]
            if any([ns.has_auth for ns in node_settings_list if ns]):
                num_authorized += 1
                if any([(ns.complete and ns.configured) for ns in node_settings_list if ns]):
                    num_linked += 1
    return {
        'enabled': num_enabled,
        'authorized': num_authorized,
        'linked': num_linked
    }
Esempio n. 20
0
def migrate_nodes(index, query=None):
    logger.info('Migrating nodes to index: {}'.format(index))
    node_query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)
    if query:
        node_query = query & node_query
    total = Node.find(node_query).count()
    increment = 200
    total_pages = (total // increment) + 1
    pages = paginated(Node,
                      query=node_query,
                      increment=increment,
                      each=False,
                      include=['contributor__user__guids'])

    for page_number, page in enumerate(pages):
        logger.info('Updating page {} / {}'.format(page_number + 1,
                                                   total_pages))
        Node.bulk_update_search(page, index=index)

    logger.info('Nodes migrated: {}'.format(total))
Esempio n. 21
0
    def get_events(self, date):
        """ Get all node logs from a given date for a 24 hour period,
        ending at the date given.
        """
        super(NodeLogEvents, self).get_events(date)

        # In the end, turn the date back into a datetime at midnight for queries
        date = datetime(date.year, date.month,
                        date.day).replace(tzinfo=pytz.UTC)

        logger.info('Gathering node logs between {} and {}'.format(
            date, (date + timedelta(1)).isoformat()))

        node_log_query = Q('date', 'lt', date + timedelta(1)) & Q(
            'date', 'gte', date)

        node_logs = paginated(NodeLog, query=node_log_query)
        node_log_events = []
        for node_log in node_logs:
            log_date = node_log.date.replace(tzinfo=pytz.UTC)
            event = {
                'keen': {
                    'timestamp': log_date.isoformat()
                },
                'date': log_date.isoformat(),
                'action': node_log.action
            }

            if node_log.user:
                event.update({'user_id': node_log.user._id})

            node_log_events.append(event)

        logger.info('NodeLogs counted. {} NodeLogs.'.format(
            len(node_log_events)))
        return node_log_events
Esempio n. 22
0
    def get_events(self, date=None):
        super(AddonSnapshot, self).get_events(date)

        from website.settings import ADDONS_AVAILABLE
        counts = []
        addons_available = {
            k: v
            for k, v in [(addon.short_name, addon)
                         for addon in ADDONS_AVAILABLE]
        }

        for short_name, addon in addons_available.iteritems():

            has_external_account = True

            if addon.settings_models.get('user'):
                if AddonNodeSettingsBase in addon.settings_models[
                        'user'].__class__.__bases__:
                    has_external_account = False

            connected_count = 0
            deleted_count = 0
            disconnected_count = 0
            node_settings_model = addon.settings_models.get('node')
            if node_settings_model:
                for node_settings in paginated(node_settings_model):
                    if AddonNodeSettingsBase in node_settings.__class__.__bases__:
                        has_external_account = False
                    if node_settings.owner and not node_settings.owner.is_bookmark_collection:
                        connected_count += 1
                deleted_count = addon.settings_models['node'].find(
                    Q('deleted', 'eq', True)).count(
                    ) if addon.settings_models.get('node') else 0
                if has_external_account:
                    disconnected_count = addon.settings_models['node'].find(
                        Q('external_account', 'eq', None)
                        & Q('deleted', 'ne', True)).count(
                        ) if addon.settings_models.get('node') else 0
                else:
                    disconnected_count = addon.settings_models['node'].find(
                        Q('configured', 'eq', True)
                        & Q('complete', 'eq', False)
                        & Q('deleted', 'ne', True)).count(
                        ) if addon.settings_models.get('node') else 0
                total = connected_count + deleted_count + disconnected_count
            usage_counts = get_enabled_authorized_linked(
                addon.settings_models.get('user'), has_external_account,
                addon.short_name)

            counts.append({
                'provider': {
                    'name': short_name
                },
                'users': usage_counts,
                'nodes': {
                    'total': total,
                    'connected': connected_count,
                    'deleted': deleted_count,
                    'disconnected': disconnected_count
                }
            })

            logger.info(
                '{} counted. Users with a linked node: {}, Total connected nodes: {}.'
                .format(addon.short_name, usage_counts['linked'], total))
        return counts
Esempio n. 23
0
    }
    if not node.is_retracted:
        for wiki in [
            NodeWikiPage.load(x)
            for x in node.wiki_pages_current.values()
        ]:
            elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node)

    return elastic_document

@requires_search
def update_node(node, index=None, bulk=False, async=False):
    index = index or INDEX

    from website.files.models.osfstorage import OsfStorageFile
    for file_ in paginated(OsfStorageFile, Q('node', 'eq', node)):
        update_file(file_.wrapped(), index=index)

    if node.is_deleted or not node.is_public or node.archiving or (node.is_spammy and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH):
        delete_doc(node._id, node, index=index)
    else:
        category = get_doctype_from_node(node)
        elastic_document = serialize_node(node, category)
        if bulk:
            return elastic_document
        else:
            es.index(index=index, doc_type=category, id=node._id, body=elastic_document, refresh=True)

def bulk_update_nodes(serialize, nodes, index=None):
    """Updates the list of input projects
Esempio n. 24
0
def update_node(node, index=None, bulk=False):
    index = index or INDEX
    from website.addons.wiki.model import NodeWikiPage

    category = get_doctype_from_node(node)

    elastic_document_id = node._id
    parent_id = node.parent_id

    from website.files.models.osfstorage import OsfStorageFile
    for file_ in paginated(OsfStorageFile, Q('node', 'eq', node)):
        update_file(file_, index=index)

    if node.is_deleted or not node.is_public or node.archiving:
        delete_doc(elastic_document_id, node)
    else:
        try:
            normalized_title = six.u(node.title)
        except TypeError:
            normalized_title = node.title
        normalized_title = unicodedata.normalize('NFKD',
                                                 normalized_title).encode(
                                                     'ascii', 'ignore')

        elastic_document = {
            'id':
            elastic_document_id,
            'contributors': [{
                'fullname': x.fullname,
                'url': x.profile_url if x.is_active else None
            } for x in node.visible_contributors if x is not None],
            'title':
            node.title,
            'normalized_title':
            normalized_title,
            'category':
            category,
            'public':
            node.is_public,
            'tags': [tag._id for tag in node.tags if tag],
            'description':
            node.description,
            'url':
            node.url,
            'is_registration':
            node.is_registration,
            'is_pending_registration':
            node.is_pending_registration,
            'is_retracted':
            node.is_retracted,
            'is_pending_retraction':
            node.is_pending_retraction,
            'embargo_end_date':
            node.embargo_end_date.strftime("%A, %b. %d, %Y")
            if node.embargo_end_date else False,
            'is_pending_embargo':
            node.is_pending_embargo,
            'registered_date':
            node.registered_date,
            'wikis': {},
            'parent_id':
            parent_id,
            'date_created':
            node.date_created,
            'license':
            serialize_node_license_record(node.license),
            'affiliated_institutions':
            [inst.name for inst in node.affiliated_institutions],
            'boost':
            int(not node.is_registration) +
            1,  # This is for making registered projects less relevant
        }
        if not node.is_retracted:
            for wiki in [
                    NodeWikiPage.load(x)
                    for x in node.wiki_pages_current.values()
            ]:
                elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node)

        if bulk:
            return elastic_document
        else:
            es.index(index=index,
                     doc_type=category,
                     id=elastic_document_id,
                     body=elastic_document,
                     refresh=True)
Esempio n. 25
0
def update_node(node, index=None, bulk=False):
    index = index or INDEX
    from website.addons.wiki.model import NodeWikiPage

    category = get_doctype_from_node(node)

    elastic_document_id = node._id
    parent_id = node.parent_id

    from website.files.models.osfstorage import OsfStorageFile
    for file_ in paginated(OsfStorageFile, Q('node', 'eq', node)):
        update_file(file_, index=index)

    if node.is_deleted or not node.is_public or node.archiving:
        delete_doc(elastic_document_id, node)
    else:
        try:
            normalized_title = six.u(node.title)
        except TypeError:
            normalized_title = node.title
        normalized_title = unicodedata.normalize('NFKD', normalized_title).encode('ascii', 'ignore')

        elastic_document = {
            'id': elastic_document_id,
            'contributors': [
                {
                    'fullname': x.fullname,
                    'url': x.profile_url if x.is_active else None
                }
                for x in node.visible_contributors
                if x is not None
            ],
            'title': node.title,
            'normalized_title': normalized_title,
            'category': category,
            'public': node.is_public,
            'tags': [tag._id for tag in node.tags if tag],
            'description': node.description,
            'url': node.url,
            'is_registration': node.is_registration,
            'is_pending_registration': node.is_pending_registration,
            'is_retracted': node.is_retracted,
            'is_pending_retraction': node.is_pending_retraction,
            'embargo_end_date': node.embargo_end_date.strftime("%A, %b. %d, %Y") if node.embargo_end_date else False,
            'is_pending_embargo': node.is_pending_embargo,
            'registered_date': node.registered_date,
            'wikis': {},
            'parent_id': parent_id,
            'date_created': node.date_created,
            'license': serialize_node_license_record(node.license),
            'primary_institution': node.primary_institution.name if node.primary_institution else None,
            'boost': int(not node.is_registration) + 1,  # This is for making registered projects less relevant
        }
        if not node.is_retracted:
            for wiki in [
                NodeWikiPage.load(x)
                for x in node.wiki_pages_current.values()
            ]:
                elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node)

        if bulk:
            return elastic_document
        else:
            es.index(index=index, doc_type=category, id=elastic_document_id, body=elastic_document, refresh=True)
Esempio n. 26
0
    def get_events(self, date):
        super(UserSummary, self).get_events(date)

        # Convert to a datetime at midnight for queries and the timestamp
        timestamp_datetime = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC)
        query_datetime = timestamp_datetime + timedelta(1)

        active_user_query = (
            Q('is_registered', 'eq', True) &
            Q('password', 'ne', None) &
            Q('merged_by', 'eq', None) &
            Q('date_disabled', 'eq', None) &
            Q('date_confirmed', 'ne', None) &
            Q('date_confirmed', 'lt', query_datetime)
        )

        active_users = 0
        depth_users = 0
        profile_edited = 0
        user_pages = paginated(User, query=active_user_query)
        for user in user_pages:
            active_users += 1
            log_count = count_user_logs(user)
            if log_count >= LOG_THRESHOLD:
                depth_users += 1
            if user.social or user.schools or user.jobs:
                profile_edited += 1

        counts = {
            'keen': {
                'timestamp': timestamp_datetime.isoformat()
            },
            'status': {
                'active': active_users,
                'depth': depth_users,
                'unconfirmed': User.find(
                    Q('date_registered', 'lt', query_datetime) &
                    Q('date_confirmed', 'eq', None)
                ).count(),
                'deactivated': User.find(
                    Q('date_disabled', 'ne', None) &
                    Q('date_disabled', 'lt', query_datetime)
                ).count(),
                'merged': User.find(
                    Q('date_registered', 'lt', query_datetime) &
                    Q('merged_by', 'ne', None)
                ).count(),
                'profile_edited': profile_edited
            }
        }
        logger.info(
            'Users counted. Active: {}, Depth: {}, Unconfirmed: {}, Deactivated: {}, Merged: {}, Profile Edited: {}'.format(
                counts['status']['active'],
                counts['status']['depth'],
                counts['status']['unconfirmed'],
                counts['status']['deactivated'],
                counts['status']['merged'],
                counts['status']['profile_edited']
            )
        )
        return [counts]
Esempio n. 27
0
    }
    if not node.is_retracted:
        for wiki in [
                NodeWikiPage.load(x) for x in node.wiki_pages_current.values()
        ]:
            elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node)

    return elastic_document


@requires_search
def update_node(node, index=None, bulk=False, async=False):
    index = index or INDEX

    from website.files.models.osfstorage import OsfStorageFile
    for file_ in paginated(OsfStorageFile, Q('node', 'eq', node)):
        update_file(file_.wrapped(), index=index)

    if node.is_deleted or not node.is_public or node.archiving or (
            node.is_spammy and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH):
        delete_doc(node._id, node, index=index)
    else:
        category = get_doctype_from_node(node)
        elastic_document = serialize_node(node, category)
        if bulk:
            return elastic_document
        else:
            es.index(index=index,
                     doc_type=category,
                     id=node._id,
                     body=elastic_document,