Beispiel #1
0
def activity():
    """Reads node activity from pre-generated popular projects and registrations.
    New and Noteworthy projects are set manually or through `scripts/populate_new_and_noteworthy_projects.py`
    Popular projects and registrations are generated by `scripts/populate_popular_projects_and_registrations.py`
    """

    # New and Noreworthy Projects
    try:
        new_and_noteworthy_pointers = Node.load(settings.NEW_AND_NOTEWORTHY_LINKS_NODE).nodes_pointer
        new_and_noteworthy_projects = [pointer.node for pointer in new_and_noteworthy_pointers]
    except AttributeError:
        new_and_noteworthy_projects = []

    # Popular Projects
    try:
        popular_public_projects = Node.load(settings.POPULAR_LINKS_NODE).nodes_pointer
    except AttributeError:
        popular_public_projects = []

    # Popular Registrations
    try:
        popular_public_registrations = Node.load(settings.POPULAR_LINKS_REGISTRATIONS).nodes_pointer
    except AttributeError:
        popular_public_registrations = []

    return {
        'new_and_noteworthy_projects': new_and_noteworthy_projects,
        'recent_public_registrations': utils.recent_public_registrations(),
        'popular_public_projects': popular_public_projects,
        'popular_public_registrations': popular_public_registrations,
    }
Beispiel #2
0
def activity():
    """Generate analytics for most popular public projects and registrations.
    Called by `scripts/update_populate_projects_and_registrations`
    """
    popular_public_projects = []
    popular_public_registrations = []
    max_projects_to_display = settings.MAX_POPULAR_PROJECTS

    if settings.KEEN['public']['read_key']:
        keen_activity = get_keen_activity()
        node_visits = keen_activity['node_visits']

        node_data = [{
            'node': x['node.id'],
            'views': x['result']
        } for x in node_visits]
        node_data.sort(key=lambda datum: datum['views'], reverse=True)

        node_data = [node_dict['node'] for node_dict in node_data]

        for nid in node_data:
            node = Node.load(nid)
            if node is None:
                continue
            if node.is_public and not node.is_registration and not node.is_deleted:
                if len(popular_public_projects) < max_projects_to_display:
                    popular_public_projects.append(node)
            elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted:
                if len(popular_public_registrations) < max_projects_to_display:
                    popular_public_registrations.append(node)
            if len(popular_public_projects) >= max_projects_to_display and len(
                    popular_public_registrations) >= max_projects_to_display:
                break

    # New and Noteworthy projects are updated manually
    new_and_noteworthy_pointers = Node.find_one(
        Q('_id', 'eq', settings.NEW_AND_NOTEWORTHY_LINKS_NODE)).nodes_pointer
    new_and_noteworthy_projects = [
        pointer.node for pointer in new_and_noteworthy_pointers
    ]

    return {
        'new_and_noteworthy_projects': new_and_noteworthy_projects,
        'recent_public_registrations': recent_public_registrations(),
        'popular_public_projects': popular_public_projects,
        'popular_public_registrations': popular_public_registrations
    }
Beispiel #3
0
def activity():
    """Generate analytics for most popular public projects and registrations.
    Called by `scripts/update_populate_projects_and_registrations`
    """
    popular_public_projects = []
    popular_public_registrations = []
    max_projects_to_display = settings.MAX_POPULAR_PROJECTS

    if settings.KEEN['public']['read_key']:
        keen_activity = get_keen_activity()
        node_visits = keen_activity['node_visits']

        node_data = [{'node': x['node.id'], 'views': x['result']} for x in node_visits]
        node_data.sort(key=lambda datum: datum['views'], reverse=True)

        node_data = [node_dict['node'] for node_dict in node_data]

        for nid in node_data:
            node = Node.load(nid)
            if node is None:
                continue
            if node.is_public and not node.is_registration and not node.is_deleted:
                if len(popular_public_projects) < max_projects_to_display:
                    popular_public_projects.append(node)
            elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted:
                if len(popular_public_registrations) < max_projects_to_display:
                    popular_public_registrations.append(node)
            if len(popular_public_projects) >= max_projects_to_display and len(popular_public_registrations) >= max_projects_to_display:
                break

    # New and Noteworthy projects are updated manually
    new_and_noteworthy_pointers = Node.find_one(Q('_id', 'eq', settings.NEW_AND_NOTEWORTHY_LINKS_NODE)).nodes_pointer
    new_and_noteworthy_projects = [pointer.node for pointer in new_and_noteworthy_pointers]

    return {
        'new_and_noteworthy_projects': new_and_noteworthy_projects,
        'recent_public_registrations': recent_public_registrations(),
        'popular_public_projects': popular_public_projects,
        'popular_public_registrations': popular_public_registrations
    }
Beispiel #4
0
def recent_public_registrations(n=10):
    registrations = Node.find(CONTENT_NODE_QUERY & Q('parent_node', 'eq', None)
                              & Q('is_public', 'eq', True)
                              & Q('is_registration', 'eq', True)).sort(
                                  '-registered_date')
    for reg in registrations:
        if not n:
            break
        if reg.is_retracted or reg.is_pending_embargo:
            # Filter based on calculated properties
            continue
        n -= 1
        yield reg
Beispiel #5
0
def activity():
    """Reads node activity from pre-generated popular projects and registrations.
    New and Noteworthy projects are set manually or through `scripts/populate_new_and_noteworthy_projects.py`
    Popular projects and registrations are generated by `scripts/populate_popular_projects_and_registrations.py`
    """

    # New and Noreworthy Projects
    try:
        new_and_noteworthy_pointers = Node.load(
            settings.NEW_AND_NOTEWORTHY_LINKS_NODE).nodes_pointer
        new_and_noteworthy_projects = [
            pointer.node for pointer in new_and_noteworthy_pointers
        ]
    except AttributeError:
        new_and_noteworthy_projects = []

    # Popular Projects
    try:
        popular_public_projects = Node.load(
            settings.POPULAR_LINKS_NODE).nodes_pointer
    except AttributeError:
        popular_public_projects = []

    # Popular Registrations
    try:
        popular_public_registrations = Node.load(
            settings.POPULAR_LINKS_REGISTRATIONS).nodes_pointer
    except AttributeError:
        popular_public_registrations = []

    return {
        'new_and_noteworthy_projects': new_and_noteworthy_projects,
        'recent_public_registrations': utils.recent_public_registrations(),
        'popular_public_projects': popular_public_projects,
        'popular_public_registrations': popular_public_registrations,
    }
Beispiel #6
0
def recent_public_registrations(n=10):
    registrations = Node.find(
        CONTENT_NODE_QUERY &
        Q('parent_node', 'eq', None) &
        Q('is_public', 'eq', True) &
        Q('is_registration', 'eq', True)
    ).sort(
        '-registered_date'
    )
    for reg in registrations:
        if not n:
            break
        if reg.is_retracted or reg.is_pending_embargo:
            # Filter based on calculated properties
            continue
        n -= 1
        yield reg
Beispiel #7
0
def migrate():
    targets = get_targets()
    total = len(targets)
    for i, user in enumerate(targets):
        logger.info('({}/{}) Preparing to migrate User {}'.format(i + 1, total, user._id))
        bookmarks = Node.find(Q('is_bookmark_collection', 'eq', True) & Q('contributors', 'eq', user._id))
        if sum([bool(n.nodes) for n in bookmarks]) > 1:
            raise Exception('Expected no users to have more than one bookmark with .nodes, {} violated'.format(user._id))
        bookmark_to_keep = None
        for n in bookmarks:
            if n.nodes:
                bookmark_to_keep = n
        bookmark_to_keep = bookmark_to_keep or bookmarks[0]
        logger.info('Marking Node {} as primary Bookmark Collection for User {}, preparing to delete others'.format(bookmark_to_keep._id, user._id))
        for n in bookmarks:
            if n._id != bookmark_to_keep._id:
                n.is_deleted = True
                n.save()
        logger.info('Successfully migrated User {}'.format(user._id))
    logger.info('Successfully migrated {} users'.format(total))
Beispiel #8
0
def _provision_node(node_id):
    from website.project import Node
    node = Node.load(node_id)
    response = requests.post(
        settings.PIWIK_HOST,
        data={
            'module': 'API',
            'token_auth': settings.PIWIK_ADMIN_TOKEN,
            'format': 'json',
            'method': 'SitesManager.addSite',
            'siteName': 'Node: ' + node._id,
            'urls': [
                settings.CANONICAL_DOMAIN + node.url,
                settings.SHORT_DOMAIN + node.url,
            ],
        }
    )

    try:
        # Use pymongo so that we can save a single field without overwriting node
        piwik_site_id = json.loads(response.content)['value']
        db.node.update({'_id': node._id}, {'$set': {'piwik_site_id': piwik_site_id}})
    except ValueError:
        raise PiwikException('Piwik site creation failed for ' + node._id)

    # contributors lists might be empty, due to a bug.
    if node.contributors:
        users = ['osf.' + user._id for user in node.contributors if user]
        if node.is_public:
            users.append('anonymous')
        _change_view_access(
            # contibutors lists might contain `None` due to bug
            users,
            node,
            'view'
        )
Beispiel #9
0
def _provision_node(node_id):
    from website.project import Node
    node = Node.load(node_id)
    response = requests.post(
        settings.PIWIK_HOST,
        data={
            'module': 'API',
            'token_auth': settings.PIWIK_ADMIN_TOKEN,
            'format': 'json',
            'method': 'SitesManager.addSite',
            'siteName': 'Node: ' + node._id,
            'urls': [
                settings.CANONICAL_DOMAIN + node.url,
                settings.SHORT_DOMAIN + node.url,
            ],
        }
    )

    try:
        # Use pymongo so that we can save a single field without overwriting node
        piwik_site_id = json.loads(response.content)['value']
        db.node.update({'_id': node._id}, {'$set': {'piwik_site_id': piwik_site_id}})
    except ValueError:
        raise PiwikException('Piwik site creation failed for ' + node._id)

    # contributors lists might be empty, due to a bug.
    if node.contributors:
        users = ['osf.' + user._id for user in node.contributors if user]
        if node.is_public:
            users.append('anonymous')
        _change_view_access(
            # contibutors lists might contain `None` due to bug
            users,
            node,
            'view'
        )
Beispiel #10
0
def activity():

    popular_public_projects = []
    popular_public_registrations = []
    hits = {}

    # get the date for exactly one week ago
    target_date = datetime.date.today() - datetime.timedelta(weeks=1)

    if settings.PIWIK_HOST:
        client = PiwikClient(
            url=settings.PIWIK_HOST,
            auth_token=settings.PIWIK_ADMIN_TOKEN,
            site_id=settings.PIWIK_SITE_ID,
            period='week',
            date=target_date.strftime('%Y-%m-%d'),
        )

        popular_project_ids = [
            x for x in client.custom_variables if x.label == 'Project ID'
        ][0].values

        for nid in popular_project_ids:
            node = Node.load(nid.value)
            if node is None:
                continue
            if node.is_public and not node.is_registration and not node.is_deleted:
                if len(popular_public_projects) < 10:
                    popular_public_projects.append(node)
            elif node.is_public and node.is_registration and not node.is_deleted:
                if len(popular_public_registrations) < 10:
                    popular_public_registrations.append(node)
            if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10:
                break

        hits = {
            x.value: {
                'hits': x.actions,
                'visits': x.visits
            } for x in popular_project_ids
        }

    # Projects

    recent_query = (
        Q('category', 'eq', 'project') &
        Q('is_public', 'eq', True) &
        Q('is_deleted', 'eq', False)
    )

    recent_public_projects = Node.find(
        recent_query &
        Q('is_registration', 'eq', False)
    ).sort(
        '-date_created'
    ).limit(10)

    return {
        'recent_public_projects': recent_public_projects,
        'recent_public_registrations': recent_public_registrations(),
        'popular_public_projects': popular_public_projects,
        'popular_public_registrations': popular_public_registrations,
        'hits': hits,
    }
Beispiel #11
0
 def tearDown(self):
     super(TestExplorePublicActivity, self).tearDown()
     Node.remove()
Beispiel #12
0
 def tearDown(self):
     super(TestExplorePublicActivity, self).tearDown()
     Node.remove()
Beispiel #13
0
def get_targets():
    logger.info('Acquiring targets...')
    targets = [u for u in User.find() if Node.find(Q('is_bookmark_collection', 'eq', True) & Q('is_deleted', 'eq', False) & Q('contributors', 'eq', u._id)).count() > 1]
    logger.info('Found {} target users.'.format(len(targets)))
    return targets
Beispiel #14
0
def activity():

    popular_public_projects = []
    popular_public_registrations = []
    hits = {}
    max_popular_projects = 20

    if settings.KEEN['public']['read_key']:
        client = KeenClient(
            project_id=settings.KEEN['public']['project_id'],
            read_key=settings.KEEN['public']['read_key'],
        )

        node_pageviews = client.count(
            event_collection='pageviews',
            timeframe='this_7_days',
            group_by='node.id',
            filters=[
                {
                    'property_name': 'node.id',
                    'operator': 'exists',
                    'property_value': True
                }
            ]
        )

        node_visits = client.count_unique(
            event_collection='pageviews',
            target_property='anon.id',
            timeframe='this_7_days',
            group_by='node.id',
            filters=[
                {
                    'property_name': 'node.id',
                    'operator': 'exists',
                    'property_value': True
                }
            ]
        )

        node_data = [{'node': x['node.id'], 'views': x['result']} for x in node_pageviews[0:max_popular_projects]]

        for node_visit in node_visits[0:max_popular_projects]:
            for node_result in node_data:
                if node_visit['node.id'] == node_result['node']:
                    node_result.update({'visits': node_visit['result']})

        node_data.sort(key=lambda datum: datum['views'], reverse=True)

        for nid in node_data:
            node = Node.load(nid['node'])
            if node is None:
                continue
            if node.is_public and not node.is_registration and not node.is_deleted:
                if len(popular_public_projects) < 10:
                    popular_public_projects.append(node)
            elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted:
                if len(popular_public_registrations) < 10:
                    popular_public_registrations.append(node)
            if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10:
                break

        hits = {
            datum['node']: {
                'hits': datum['views'],
                'visits': datum['visits']
            } for datum in node_data
        }

    # Projects

    new_and_noteworthy_pointers = Node.find_one(Q('_id', 'eq', settings.NEW_AND_NOTEWORTHY_LINKS_NODE)).nodes_pointer
    new_and_noteworthy_projects = [pointer.node for pointer in new_and_noteworthy_pointers]

    return {
        'new_and_noteworthy_projects': new_and_noteworthy_projects,
        'recent_public_registrations': recent_public_registrations(),
        'popular_public_projects': popular_public_projects,
        'popular_public_registrations': popular_public_registrations,
        'hits': hits,
    }
Beispiel #15
0
def activity():

    popular_public_projects = []
    popular_public_registrations = []
    hits = {}

    if settings.PIWIK_HOST:
        client = PiwikClient(
            url=settings.PIWIK_HOST,
            auth_token=settings.PIWIK_ADMIN_TOKEN,
            site_id=settings.PIWIK_SITE_ID,
            period='week',
            date='today',
        )
        popular_project_ids = [
            x for x in client.custom_variables if x.label == 'Project ID'
        ][0].values

        for nid in popular_project_ids:
            node = Node.load(nid.value)
            if node is None:
                continue
            if node.is_public and not node.is_registration and not node.is_deleted:
                if len(popular_public_projects) < 10:
                    popular_public_projects.append(node)
            elif node.is_public and node.is_registration and not node.is_deleted:
                if len(popular_public_registrations) < 10:
                    popular_public_registrations.append(node)
            if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10:
                break

        hits = {
            x.value: {
                'hits': x.actions,
                'visits': x.visits
            } for x in popular_project_ids
        }

    # Projects

    recent_query = (
        Q('category', 'eq', 'project') &
        Q('is_public', 'eq', True) &
        Q('is_deleted', 'eq', False)
    )

    # Temporary bug fix: Skip projects with empty contributor lists
    # Todo: Fix underlying bug and remove this selector
    recent_query = recent_query & Q('contributors', 'ne', [])

    recent_public_projects = Node.find(
        recent_query &
        Q('is_registration', 'eq', False)
    ).sort(
        '-date_created'
    ).limit(10)

    # Registrations
    recent_public_registrations = Node.find(
        recent_query &
        Q('is_registration', 'eq', True)
    ).sort(
        '-registered_date'
    ).limit(10)

    return {
        'recent_public_projects': recent_public_projects,
        'recent_public_registrations': recent_public_registrations,
        'popular_public_projects': popular_public_projects,
        'popular_public_registrations': popular_public_registrations,
        'hits': hits,
    }
Beispiel #16
0
def activity():

    popular_public_projects = []
    popular_public_registrations = []
    hits = {}
    max_popular_projects = 20

    if settings.KEEN['public']['read_key']:
        client = KeenClient(
            project_id=settings.KEEN['public']['project_id'],
            read_key=settings.KEEN['public']['read_key'],
        )

        node_pageviews = client.count(
            event_collection='pageviews',
            timeframe='this_7_days',
            group_by='node.id',
            filters=[
                {
                    'property_name': 'node.id',
                    'operator': 'exists',
                    'property_value': True
                }
            ]
        )

        node_visits = client.count_unique(
            event_collection='pageviews',
            target_property='anon.id',
            timeframe='this_7_days',
            group_by='node.id',
            filters=[
                {
                    'property_name': 'node.id',
                    'operator': 'exists',
                    'property_value': True
                }
            ]
        )

        node_data = [{'node': x['node.id'], 'views': x['result']} for x in node_pageviews[0:max_popular_projects]]

        for node_visit in node_visits[0:max_popular_projects]:
            for node_result in node_data:
                if node_visit['node.id'] == node_result['node']:
                    node_result.update({'visits': node_visit['result']})

        node_data.sort(key=lambda datum: datum['views'], reverse=True)

        for nid in node_data:
            node = Node.load(nid['node'])
            if node is None:
                continue
            if node.is_public and not node.is_registration and not node.is_deleted:
                if len(popular_public_projects) < 10:
                    popular_public_projects.append(node)
            elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted:
                if len(popular_public_registrations) < 10:
                    popular_public_registrations.append(node)
            if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10:
                break

        hits = {
            datum['node']: {
                'hits': datum['views'],
                'visits': datum['visits']
            } for datum in node_data
        }

    # Projects

    # Only show top-level projects (any category) in new and noteworthy lists
    # This means that public children of private nodes will be excluded
    recent_query = (
        Q('parent_node', 'eq', None) &
        Q('is_public', 'eq', True) &
        CONTENT_NODE_QUERY
    )

    recent_public_projects = Node.find(
        recent_query &
        Q('is_registration', 'eq', False)
    ).sort(
        '-date_created'
    ).limit(10)

    return {
        'recent_public_projects': recent_public_projects,
        'recent_public_registrations': recent_public_registrations(),
        'popular_public_projects': popular_public_projects,
        'popular_public_registrations': popular_public_registrations,
        'hits': hits,
    }