Ejemplo n.º 1
0
    def from_analytical(self, project_identifier, forum_id):
        """ Return user from analytical database
        """
        query = """
        SELECT forum_key, discussion_name, author, moderators, subscribers,
               subject, description, project_key, project_identifier, project_name
        FROM discussion_dim
        WHERE project_identifier = '%s' AND forum_key = %d
        AND VALID_TO IS NULL""" % (safe_string(project_identifier), safe_int(forum_id))

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception(
                    "Failed reading discussion forum from analytical db. {project: %s, forum: %d}" %
                    (project_identifier, forum_id))

        if not row:
            return None

        project = {'forum_key': row[0],
                   'discussion_name': MySQLdb.escape_string(row[1]),
                   'author': MySQLdb.escape_string(row[2]),
                   'moderators': MySQLdb.escape_string(row[3]),
                   'subscribers': MySQLdb.escape_string(row[4]),
                   'subject': MySQLdb.escape_string(row[5]),
                   'description': MySQLdb.escape_string(row[6]),
                   'project_key': row[7],
                   'project_identifier': MySQLdb.escape_string(row[8]),
                   'project_name': MySQLdb.escape_string(row[9])
        }
        return project
Ejemplo n.º 2
0
    def from_analytical(self, identifier):
        """ Return project from analytical database
        """
        query = """
        SELECT identifier, project_name, author, created,
               updated, published, project_key
        FROM project_dim
        WHERE identifier = '%s' AND VALID_TO IS NULL""" % identifier

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Getting project from analytical db failed. project identifier : %s" %
                                        identifier)

        if not row:
            return None

        project = {'identifier': MySQLdb.escape_string(row[0]),
                   'project_name': MySQLdb.escape_string(row[1]),
                   'author': MySQLdb.escape_string(row[2]),
                   'created': row[3],
                   'updated': row[4],
                   'published': row[5],
                   'project_key': row[6]}
        return project
Ejemplo n.º 3
0
    def _init_sk_cache(self):
        """ Reads all the surrogate keys from the database
        """
        with analytical_query() as cursor:
            try:
                query = """SELECT context_sk, context, environment_type, path_info FROM context_dim"""
                cursor.execute(query)
                for row in cursor:
                    is_home = row[2] == 'home'
                    is_prefix = row[3].endswith('.*')
                    sk = row[0]

                    if is_home and is_prefix:
                        self.home_prefix_cache[row[3][:-2]] = sk
                        continue
                    if is_home and not is_prefix:
                        self.home_exact_cache[row[3]] = sk
                        continue

                    if not is_home and is_prefix:
                        self.project_prefix_cache[row[3][:-2]] = sk
                        continue
                    if not is_home and not is_prefix:
                        self.project_exact_cache[row[3]] = sk
                        continue
            except:
                conf.log.exception("Failed reading context sk")
Ejemplo n.º 4
0
    def from_analytical(self, username):
        """ Return user from analytical database
        """
        query = """
        SELECT username, mail, mobile, givenName,
               lastName, authentication, status, user_key
        FROM user_dim
        WHERE username = %s AND VALID_TO IS NULL
        """

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query, username)
                row = cursor.fetchone()
            except:
                conf.log.exception(
                    "Failed reading user from operational database. username : %s"
                    % username)

        if not row:
            return None

        user = {
            'username': row[0],
            'mail': row[1],
            'mobile': row[2],
            'givenName': row[3],
            'lastName': row[4],
            'authentication': row[5],
            'status': row[6],
            'user_key': row[7]
        }
        return user
Ejemplo n.º 5
0
    def from_analytical(self, identifier):
        """ Return project from analytical database
        """
        query = """
        SELECT identifier, project_name, author, created,
               updated, published, project_key
        FROM project_dim
        WHERE identifier = '%s' AND VALID_TO IS NULL""" % identifier

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception(
                    "Getting project from analytical db failed. project identifier : %s"
                    % identifier)

        if not row:
            return None

        project = {
            'identifier': MySQLdb.escape_string(row[0]),
            'project_name': MySQLdb.escape_string(row[1]),
            'author': MySQLdb.escape_string(row[2]),
            'created': row[3],
            'updated': row[4],
            'published': row[5],
            'project_key': row[6]
        }
        return project
Ejemplo n.º 6
0
    def from_analytical(self, username):
        """ Return user from analytical database
        """
        query = """
        SELECT username, mail, mobile, givenName,
               lastName, authentication, status, user_key
        FROM user_dim
        WHERE username = %s AND VALID_TO IS NULL
        """

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query, username)
                row = cursor.fetchone()
            except:
                conf.log.exception("Failed reading user from operational database. username : %s" % username)

        if not row:
            return None

        user = {
            'username': row[0],
            'mail': row[1],
            'mobile': row[2],
            'givenName': row[3],
            'lastName': row[4],
            'authentication': row[5],
            'status': row[6],
            'user_key': row[7]
        }
        return user
Ejemplo n.º 7
0
    def _init_sk_cache(self):
        """ Reads all the surrogate keys from the database
        """
        with analytical_query() as cursor:
            try:
                query = """SELECT context_sk, context, environment_type, path_info FROM context_dim"""
                cursor.execute(query)
                for row in cursor:
                    is_home = row[2] == 'home'
                    is_prefix = row[3].endswith('.*')
                    sk = row[0]

                    if is_home and is_prefix:
                        self.home_prefix_cache[row[3][:-2]] = sk
                        continue
                    if is_home and not is_prefix:
                        self.home_exact_cache[row[3]] = sk
                        continue

                    if not is_home and is_prefix:
                        self.project_prefix_cache[row[3][:-2]] = sk
                        continue
                    if not is_home and not is_prefix:
                        self.project_exact_cache[row[3]] = sk
                        continue
            except:
                conf.log.exception("Failed reading context sk")
Ejemplo n.º 8
0
 def _read_discussion_surrogate_keys(self):
     query = "SELECT project_identifier, forum_key, project_key FROM discussion_dim WHERE VALID_TO IS NULL"
     with analytical_query() as cursor:
         try:
             cursor.execute(query)
             for row in cursor:
                 cache_key = self.cache_key(row[0], row[1])
                 self.sk_cache[cache_key] = row[2]
         except:
             conf.log.exception("Failed reading discussion surrogate keys")
Ejemplo n.º 9
0
 def _read_discussion_surrogate_keys(self):
     query = "SELECT project_identifier, forum_key, project_key FROM discussion_dim WHERE VALID_TO IS NULL"
     with analytical_query() as cursor:
         try:
             cursor.execute(query)
             for row in cursor:
                 cache_key = self.cache_key(row[0], row[1])
                 self.sk_cache[cache_key] = row[2]
         except:
             conf.log.exception("Failed reading discussion surrogate keys")
Ejemplo n.º 10
0
    def _read_project_surrogate_keys(self):
        query = "SELECT identifier, project_sk FROM project_dim WHERE VALID_TO IS NULL"

        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                for row in cursor:
                    self.sk_cache[row[0]] = row[1]
            except:
                conf.log.exception("Failed reading project surrogate keys")
Ejemplo n.º 11
0
    def _read_project_surrogate_keys(self):
        query = "SELECT identifier, project_sk FROM project_dim WHERE VALID_TO IS NULL"

        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                for row in cursor:
                    self.sk_cache[row[0]] = row[1]
            except:
                conf.log.exception("Failed reading project surrogate keys")
    def applied(self):

        with analytical_query() as cursor:
            cursor.execute('''
                SELECT COUNT(*) AS count FROM %s.context_dim
                WHERE context = 'Admin Project VCM'
            ''' % conf.db_analytical_schema_name)
            row = cursor.fetchone()
            if row[0] == 1:
                return True

        return False
Ejemplo n.º 13
0
    def applied(self):

        with analytical_query() as cursor:
            cursor.execute('''
                SELECT COUNT(*) AS count FROM %s.context_dim
                WHERE (context IN ('Project permissions','Home permissions','Admin Users','Admin Groups', 'Group templates'))
            ''' % conf.db_analytical_schema_name)
            row = cursor.fetchone()
            if row[0] == 6:
                return True

        return False
Ejemplo n.º 14
0
 def _init_active_user_sks(self):
     """ Returns a list of users that are
         active {username => user_sk}
     """
     query = "SELECT username, user_sk FROM user_dim WHERE VALID_TO IS NULL"
     with analytical_query() as cursor:
         try:
             cursor.execute(query)
             for row in cursor:
                 self.sk_cache[row[0]] = row[1]
         except:
             conf.log.exception("Failed reading user surrogate keys with query '%s'" % query)
    def applied(self):
        """
        Check if column exists or not
        :returns: True if exists, otherwise False
        """
        action_names = []

        with analytical_query() as cursor:
            cursor.execute('SELECT action_name FROM %s.event_dim' % conf.db_analytical_schema_name)
            action_names = [row[0] for row in cursor]

        # Check if all actions can be found from event_dim
        return not all([(action not in action_names) for  action, crud, context in self.actions])
Ejemplo n.º 16
0
    def _read_data_management_plans(self):
        """ Reads current data management plan table
        """
        self.data_management_plans = []
        query = ("SELECT `table`, buffer_partitions, partition_size, partition_count, archive_table, "
                 "VALID_FROM, VALID_TO FROM data_management")

        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                for row in cursor.fetchall():
                    self.data_management_plans.append(TableDataManagementPlan(row))
            except:
                conf.log.exception("Failed reading data management plans")
Ejemplo n.º 17
0
 def _init_active_user_sks(self):
     """ Returns a list of users that are
         active {username => user_sk}
     """
     query = "SELECT username, user_sk FROM user_dim WHERE VALID_TO IS NULL"
     with analytical_query() as cursor:
         try:
             cursor.execute(query)
             for row in cursor:
                 self.sk_cache[row[0]] = row[1]
         except:
             conf.log.exception(
                 "Failed reading user surrogate keys with query '%s'" %
                 query)
Ejemplo n.º 18
0
    def _inapplicable_sk_row(self):
        query = """
            SELECT discussion_sk FROM discussion_dim
            WHERE project_identifier = '<Inapplicable>'
              AND discussion_name = '<Inapplicable>'"""
        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Getting inapplicable discussion sk failed")

        return row
Ejemplo n.º 19
0
    def _inapplicable_sk_row(self):
        query = """
            SELECT discussion_sk FROM discussion_dim
            WHERE project_identifier = '<Inapplicable>'
              AND discussion_name = '<Inapplicable>'"""
        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Getting inapplicable discussion sk failed")

        return row
    def applied(self):
        """
        Check if column exists or not
        :returns: True if exists, otherwise False
        """
        action_names = []

        with analytical_query() as cursor:
            cursor.execute('SELECT action_name FROM %s.event_dim' %
                           conf.db_analytical_schema_name)
            action_names = [row[0] for row in cursor]

        # Check if all actions can be found from event_dim
        return not all([(action not in action_names)
                        for action in self.actions.keys()])
Ejemplo n.º 21
0
    def _read_data_management_plans(self):
        """ Reads current data management plan table
        """
        self.data_management_plans = []
        query = (
            "SELECT `table`, buffer_partitions, partition_size, partition_count, archive_table, "
            "VALID_FROM, VALID_TO FROM data_management")

        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                for row in cursor.fetchall():
                    self.data_management_plans.append(
                        TableDataManagementPlan(row))
            except:
                conf.log.exception("Failed reading data management plans")
Ejemplo n.º 22
0
    def get_active_project_sk(self, identifier):
        query = """
        SELECT project_sk
        FROM project_dim
        WHERE identifier = '%s' AND VALID_TO IS NULL""" % identifier

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Getting project sk failed. project identifier : %s" % identifier)

        if not row:
            return None

        self.sk_cache[identifier] = row[0]
        return row[0]
Ejemplo n.º 23
0
    def get_active_user_sk(self, username):
        query = """
        SELECT user_sk
        FROM user_dim
        WHERE username = '******' AND VALID_TO IS NULL""" % username

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Failed reading users surrogate key. %s" % username)

        if not row:
            return None

        self.sk_cache[username] = row[0]
        return row[0]
Ejemplo n.º 24
0
    def get_active_user_sk(self, username):
        query = """
        SELECT user_sk
        FROM user_dim
        WHERE username = '******' AND VALID_TO IS NULL""" % username

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Failed reading users surrogate key. %s" %
                                   username)

        if not row:
            return None

        self.sk_cache[username] = row[0]
        return row[0]
Ejemplo n.º 25
0
    def date_sk(self, sql_date_string):
        """ From python date to date_sk
        """
        query = """
        SELECT date_sk FROM date_dim
        WHERE date = '%s'
        """ % sql_date_string[:10]

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Getting date surrogate key failed. %s" % sql_date_string)

        if not row:
            return None

        return row[0]
Ejemplo n.º 26
0
    def _partitions(self):
        query = "EXPLAIN PARTITIONS SELECT * FROM %s" % self.table

        row = []

        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Failed reading partitions from the database")

        partition_names = []
        if len(row) > 3:
            partition_names = row[3].split(',')

        partitions = []
        for partition_name in partition_names:
            partitions.append(int(partition_name.split('_')[1]))
        return partitions
Ejemplo n.º 27
0
    def _partitions(self):
        query = "EXPLAIN PARTITIONS SELECT * FROM %s" % self.table

        row = []

        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception(
                    "Failed reading partitions from the database")

        partition_names = []
        if len(row) > 3:
            partition_names = row[3].split(',')

        partitions = []
        for partition_name in partition_names:
            partitions.append(int(partition_name.split('_')[1]))
        return partitions
Ejemplo n.º 28
0
    def date_sk(self, sql_date_string):
        """ From python date to date_sk
        """
        query = """
        SELECT date_sk FROM date_dim
        WHERE date = '%s'
        """ % sql_date_string[:10]

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Getting date surrogate key failed. %s" %
                                   sql_date_string)

        if not row:
            return None

        return row[0]
Ejemplo n.º 29
0
    def get_active_project_sk(self, identifier):
        query = """
        SELECT project_sk
        FROM project_dim
        WHERE identifier = '%s' AND VALID_TO IS NULL""" % identifier

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception(
                    "Getting project sk failed. project identifier : %s" %
                    identifier)

        if not row:
            return None

        self.sk_cache[identifier] = row[0]
        return row[0]
Ejemplo n.º 30
0
    def _get_members_activity(self, starttime, endtime, max=0):
        """
        Implements the query: fetch most active (based on user_activity information) users

        :param datetime starttime: Start time of the query
        :param datetime endtime: End time of the query
        :param int max: Optional limiter of max entries. Zero means no limit (default)
        :returns: List of members, ordered by activity
        :rtype: list
        """
        results = []

        qlimit = 'LIMIT %d' % max if max else ''

        query = """
        SELECT
            CONVERT(SUM(ua.count), SIGNED INTEGER) AS count,
            ud.user_key AS id,
            ud.username AS username,
            ud.givenName AS given_name,
            ud.mail AS mail,
            u.last_login AS last_login,
            ud.lastName AS last_name
        FROM user_activity_fact AS ua
        LEFT JOIN user_dim AS ud ON ud.user_sk = ua.user_sk
        LEFT JOIN date_dim AS dd ON dd.date_sk = ua.date_sk
        LEFT JOIN {0}.user AS u ON u.user_id = ud.user_key
        WHERE (dd.date >= %s AND dd.date <= %s)
        GROUP BY ud.user_key
        ORDER BY count DESC
        {1}
        """.format(conf.db_admin_schema_name, qlimit)

        # Fetch all the projects using time range only
        with analytical_query(cursors.DictCursor) as cursor:
            cursor.execute(query, (starttime, endtime))
            for row in cursor:
                results.append(row)

        return results
Ejemplo n.º 31
0
    def _get_members_activity(self, starttime, endtime, max=0):
        """
        Implements the query: fetch most active (based on user_activity information) users

        :param datetime starttime: Start time of the query
        :param datetime endtime: End time of the query
        :param int max: Optional limiter of max entries. Zero means no limit (default)
        :returns: List of members, ordered by activity
        :rtype: list
        """
        results = []

        qlimit = 'LIMIT %d' % max if max else ''

        query = """
        SELECT
            CONVERT(SUM(ua.count), SIGNED INTEGER) AS count,
            ud.user_key AS id,
            ud.username AS username,
            ud.givenName AS given_name,
            ud.mail AS mail,
            u.last_login AS last_login,
            ud.lastName AS last_name
        FROM user_activity_fact AS ua
        LEFT JOIN user_dim AS ud ON ud.user_sk = ua.user_sk
        LEFT JOIN date_dim AS dd ON dd.date_sk = ua.date_sk
        LEFT JOIN {0}.user AS u ON u.user_id = ud.user_key
        WHERE (dd.date >= %s AND dd.date <= %s)
        GROUP BY ud.user_key
        ORDER BY count DESC
        {1}
        """.format(conf.db_admin_schema_name, qlimit)

        # Fetch all the projects using time range only
        with analytical_query(cursors.DictCursor) as cursor:
            cursor.execute(query, (starttime, endtime))
            for row in cursor:
                results.append(row)

        return results
Ejemplo n.º 32
0
    def get_active_discussion_sk(self, project_identifier, forum_id):
        query = """
        SELECT discussion_sk
        FROM discussion_dim
        WHERE project_identifier = '%s' AND forum_key = %d
        AND VALID_TO IS NULL""" % (project_identifier, forum_id)

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Getting discussion surrogate key failed. {project: %s, forum: %d}" %
                                        (project_identifier, forum_id))

        if not row:
            return None

        cache_key = "%s.%d" % (project_identifier, forum_id)
        self.sk_cache[cache_key] = row[0]
        return row[0]
Ejemplo n.º 33
0
    def get_active_discussion_sk(self, project_identifier, forum_id):
        query = """
        SELECT discussion_sk
        FROM discussion_dim
        WHERE project_identifier = '%s' AND forum_key = %d
        AND VALID_TO IS NULL""" % (project_identifier, forum_id)

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception(
                    "Getting discussion surrogate key failed. {project: %s, forum: %d}"
                    % (project_identifier, forum_id))

        if not row:
            return None

        cache_key = "%s.%d" % (project_identifier, forum_id)
        self.sk_cache[cache_key] = row[0]
        return row[0]
Ejemplo n.º 34
0
    def event_sk(self, action):
        """ From event/action name to
            event surrogate key
        """
        if action in self.key_cache:
            return self.key_cache[action]

        query = "SELECT event_sk FROM event_dim WHERE action_name = '%s' AND VALID_TO IS NULL" % action
        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Getting event surrogate key failed. %s" % action)

        if not row:
            return None

        event_sk = row[0]
        self.key_cache[action] = event_sk

        return event_sk
Ejemplo n.º 35
0
    def from_analytical(self, project_identifier, forum_id):
        """ Return user from analytical database
        """
        query = """
        SELECT forum_key, discussion_name, author, moderators, subscribers,
               subject, description, project_key, project_identifier, project_name
        FROM discussion_dim
        WHERE project_identifier = '%s' AND forum_key = %d
        AND VALID_TO IS NULL""" % (safe_string(project_identifier),
                                   safe_int(forum_id))

        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception(
                    "Failed reading discussion forum from analytical db. {project: %s, forum: %d}"
                    % (project_identifier, forum_id))

        if not row:
            return None

        project = {
            'forum_key': row[0],
            'discussion_name': MySQLdb.escape_string(row[1]),
            'author': MySQLdb.escape_string(row[2]),
            'moderators': MySQLdb.escape_string(row[3]),
            'subscribers': MySQLdb.escape_string(row[4]),
            'subject': MySQLdb.escape_string(row[5]),
            'description': MySQLdb.escape_string(row[6]),
            'project_key': row[7],
            'project_identifier': MySQLdb.escape_string(row[8]),
            'project_name': MySQLdb.escape_string(row[9])
        }
        return project
Ejemplo n.º 36
0
    def event_sk(self, action):
        """ From event/action name to
            event surrogate key
        """
        if action in self.key_cache:
            return self.key_cache[action]

        query = "SELECT event_sk FROM event_dim WHERE action_name = '%s' AND VALID_TO IS NULL" % action
        row = []
        with analytical_query() as cursor:
            try:
                cursor.execute(query)
                row = cursor.fetchone()
            except:
                conf.log.exception("Getting event surrogate key failed. %s" %
                                   action)

        if not row:
            return None

        event_sk = row[0]
        self.key_cache[action] = event_sk

        return event_sk
Ejemplo n.º 37
0
    def _get_activity_statistics_per_project(self, starttime, endtime, event_type, project_ids, max=0):
        """
        Retrieves the activity statistics per each project, per each week
        Returns results in a list (one entry per each project)::

            [
                {'count':153, 'year':2012, 'week':1},
                {'count':125, 'year':2012, 'week':2},
                {'count':235, 'year':2012, 'week':3},
            ]

        :param datetime starttime: Start time of the query
        :param datetime endtime: End time of the query
        :param str event_type:
            Name of event type to fetch statistics from.
            See :py:data:`multiproject.home.admin.analytics.EVENT_DESCRIPTIONS`
        :param list project_ids: Optional list of project ids to limit down the results
        :param int max: Optional limiter of max entries. Zero means no limit (default)
        :returns:
            Result list, containing dict entry for each week. Data also may contain some project info, but that is
            no to be trusted.
        :rtype: list

        """
        projects = []
        results = []
        startstr = starttime.strftime('%Y-%m-%d')
        endstr = endtime.strftime('%Y-%m-%d')

        # Find out the project ids that actually have data (instead of iterating all the projects)
        fquery = '''
        SELECT
            pd.project_key AS id,
            pd.identifier AS identifier,
            pd.project_name AS name,
            ef.event_sk AS event_sk
        FROM project_dim AS pd
        LEFT JOIN event_fact AS ef ON ef.project_sk = pd.project_sk
        WHERE
            pd.project_key IN ({0}) AND
            ef.timestamp >= %s AND
            ef.timestamp <= %s AND
            ef.event_sk = (
                SELECT  event_sk
                FROM  event_dim
                WHERE action_name =  %s
            )
        GROUP BY pd.project_key
        '''.format(','.join([str(pi) for pi in project_ids]))


        with analytical_query(cursors.DictCursor) as cursor:
            cursor.execute(fquery, (starttime, endtime, event_type))
            projects = [row for row in cursor]

        self.log.debug('Provided %d project ids, found %d interesting ones' % (len(project_ids), len(projects)))

        # SQL query for retrieving weekly statistics
        query = '''
        SELECT
            COUNT(ef.event_sk) AS count,
            ef.event_sk,
            dd.week AS week,
            dd.year AS year,
            DATE_FORMAT(dd.date, '%%Y-%%m-%%d') AS date
        FROM event_fact AS ef
        RIGHT JOIN (
          SELECT
            date, week, year, date_sk
          FROM date_dim
          WHERE
           date_dim.date >= DATE(%s)
           AND date_dim.date <= DATE(%s)
        )
        AS dd ON dd.date_sk = ef.date_sk
        LEFT JOIN project_dim AS pd ON pd.project_sk = ef.project_sk
        WHERE (
          ef.event_sk = %s OR
          ef.event_sk IS NULL
        )
        {0}
        GROUP BY dd.week
        ORDER BY dd.date ASC
        '''

        # Open cursor that we'll be used for reading different tables
        with analytical_query(cursors.DictCursor) as cursor:

            # Iterate each project to fetch and group weekly results per project
            for project in projects:
                # Retrieve statistics for each week, for the specified only
                qwhere = 'AND (pd.project_key = %d OR pd.project_sk IS NULL)' % project['id']
                cursor.execute(query.format(qwhere), (startstr, endstr, project['event_sk']))
                presults = [row for row in cursor]

                # Update project dict with additional info like download counts per project
                project['weeks'] = presults
                project['count'] = sum([r['count'] for r in presults])

                results.append(project)

        # Sort the results based on project count key (as it cannot be be done in SQL)
        results = sorted(results, key=itemgetter('count'), reverse=True)

        # Limit down the top results if wanted
        if max:
            results = results[:max]

        return results
Ejemplo n.º 38
0
    def _get_activity_statistics_per_project(self, starttime, endtime, event_type, project_ids, max=0):
        """
        Retrieves the activity statistics per each project, per each week
        Returns results in a list (one entry per each project)::

            [
                {'count':153, 'year':2012, 'week':1},
                {'count':125, 'year':2012, 'week':2},
                {'count':235, 'year':2012, 'week':3},
            ]

        :param datetime starttime: Start time of the query
        :param datetime endtime: End time of the query
        :param str event_type:
            Name of event type to fetch statistics from.
            See :py:data:`multiproject.home.admin.analytics.EVENT_DESCRIPTIONS`
        :param list project_ids: Optional list of project ids to limit down the results
        :param int max: Optional limiter of max entries. Zero means no limit (default)
        :returns:
            Result list, containing dict entry for each week. Data also may contain some project info, but that is
            no to be trusted.
        :rtype: list

        """
        projects = []
        results = []
        startstr = starttime.strftime('%Y-%m-%d')
        endstr = endtime.strftime('%Y-%m-%d')

        # Find out the project ids that actually have data (instead of iterating all the projects)
        fquery = '''
        SELECT
            pd.project_key AS id,
            pd.identifier AS identifier,
            pd.project_name AS name,
            ef.event_sk AS event_sk
        FROM project_dim AS pd
        LEFT JOIN event_fact AS ef ON ef.project_sk = pd.project_sk
        WHERE
            pd.project_key IN ({0}) AND
            ef.timestamp >= %s AND
            ef.timestamp <= %s AND
            ef.event_sk = (
                SELECT  event_sk
                FROM  event_dim
                WHERE action_name =  %s
            )
        GROUP BY pd.project_key
        '''.format(','.join([str(pi) for pi in project_ids]))


        with analytical_query(cursors.DictCursor) as cursor:
            cursor.execute(fquery, (starttime, endtime, event_type))
            projects = [row for row in cursor]

        self.log.debug('Provided %d project ids, found %d interesting ones' % (len(project_ids), len(projects)))

        # SQL query for retrieving weekly statistics
        query = '''
        SELECT
            COUNT(ef.event_sk) AS count,
            ef.event_sk,
            dd.week AS week,
            dd.year AS year,
            DATE_FORMAT(dd.date, '%%Y-%%m-%%d') AS date
        FROM event_fact AS ef
        RIGHT JOIN (
          SELECT
            date, week, year, date_sk
          FROM date_dim
          WHERE
           date_dim.date >= DATE(%s)
           AND date_dim.date <= DATE(%s)
        )
        AS dd ON dd.date_sk = ef.date_sk
        LEFT JOIN project_dim AS pd ON pd.project_sk = ef.project_sk
        WHERE (
          ef.event_sk = %s OR
          ef.event_sk IS NULL
        )
        {0}
        GROUP BY dd.week
        ORDER BY dd.date ASC
        '''

        # Open cursor that we'll be used for reading different tables
        with analytical_query(cursors.DictCursor) as cursor:

            # Iterate each project to fetch and group weekly results per project
            for project in projects:
                # Retrieve statistics for each week, for the specified only
                qwhere = 'AND (pd.project_key = %d OR pd.project_sk IS NULL)' % project['id']
                cursor.execute(query.format(qwhere), (startstr, endstr, project['event_sk']))
                presults = [row for row in cursor]

                # Update project dict with additional info like download counts per project
                project['weeks'] = presults
                project['count'] = sum([r['count'] for r in presults])

                results.append(project)

        # Sort the results based on project count key (as it cannot be be done in SQL)
        results = sorted(results, key=itemgetter('count'), reverse=True)

        # Limit down the top results if wanted
        if max:
            results = results[:max]

        return results
Ejemplo n.º 39
0
    def _get_activity_statistics_per_week(self, starttime, endtime, event_type, project_ids=None, max=0):
        """
        Retrieves the download statistics per each week, without making separation between the projects
        Returns results in a list (one entry per each week)::

            [
                {'count':153, 'year':2012, 'week':1},
                {'count':125, 'year':2012, 'week':2},
                {'count':235, 'year':2012, 'week':3},
            ]

        :param datetime starttime: Start time of the query
        :param datetime endtime: End time of the query
        :param str event_type:
                    Name of event type to fetch statistics from.
                    See :py:data:`multiproject.home.admin.analytics.EVENT_DESCRIPTIONS`
        :param list project_ids: Optional list of project ids to limit down the results
        :param int max: Optional limiter of max entries. Zero means no limit (default)
        :returns:
            Result list, containing dict entry for each week. Data also may contain some project info, but that is
            no to be trusted.
        :rtype: list

        """
        qwhere = ''
        startstr = starttime.strftime('%Y-%m-%d')
        endstr = endtime.strftime('%Y-%m-%d')

        # Optional limit for specified projects
        if project_ids:
            qwhere = 'AND (pd.project_key IN (%s) OR pd.project_sk IS NULL)' % ','.join([str(int(pid)) for pid in project_ids])

        query = '''
        SELECT
            COUNT(ef.event_sk) AS count,
            pd.project_sk AS id,
            pd.project_name AS name,
            pd.identifier AS identifier,
            ef.event_sk,
            dd.week AS week,
            dd.year AS year,
            DATE_FORMAT(dd.date, '%%Y-%%m-%%d') AS date
        FROM event_fact AS ef
        RIGHT JOIN (
          SELECT
            date, week, year, date_sk
          FROM date_dim
          WHERE
           date_dim.date >= DATE(%s)
           AND date_dim.date <= DATE(%s)
        )
        AS dd ON dd.date_sk = ef.date_sk
        LEFT JOIN project_dim AS pd on pd.project_sk = ef.project_sk
        WHERE (
          ef.event_sk = (
            SELECT  event_sk
            FROM  event_dim
            WHERE action_name =  %s
          )
          OR ef.event_sk IS NULL
        )
        {0}
        GROUP BY dd.week
        ORDER BY dd.date ASC
        '''.format(qwhere)

        results = []

        with analytical_query(cursors.DictCursor) as cursor:
            cursor.execute(query, (startstr, endstr, event_type))
            results = [row for row in cursor]

        # Sort the results based on project count key (as it cannot be be done in SQL)
        if max:
            # Take top results but keep still the date order
            results = sorted(results, key=itemgetter('count'), reverse=True)
            results = results[:max]
            results = sorted(results, key=itemgetter('date'))

        return results
Ejemplo n.º 40
0
    def _get_activity_statistics_per_week(self, starttime, endtime, event_type, project_ids=None, max=0):
        """
        Retrieves the download statistics per each week, without making separation between the projects
        Returns results in a list (one entry per each week)::

            [
                {'count':153, 'year':2012, 'week':1},
                {'count':125, 'year':2012, 'week':2},
                {'count':235, 'year':2012, 'week':3},
            ]

        :param datetime starttime: Start time of the query
        :param datetime endtime: End time of the query
        :param str event_type:
                    Name of event type to fetch statistics from.
                    See :py:data:`multiproject.home.admin.analytics.EVENT_DESCRIPTIONS`
        :param list project_ids: Optional list of project ids to limit down the results
        :param int max: Optional limiter of max entries. Zero means no limit (default)
        :returns:
            Result list, containing dict entry for each week. Data also may contain some project info, but that is
            no to be trusted.
        :rtype: list

        """
        qwhere = ''
        startstr = starttime.strftime('%Y-%m-%d')
        endstr = endtime.strftime('%Y-%m-%d')

        # Optional limit for specified projects
        if project_ids:
            qwhere = 'AND (pd.project_key IN (%s) OR pd.project_sk IS NULL)' % ','.join([str(int(pid)) for pid in project_ids])

        query = '''
        SELECT
            COUNT(ef.event_sk) AS count,
            pd.project_sk AS id,
            pd.project_name AS name,
            pd.identifier AS identifier,
            ef.event_sk,
            dd.week AS week,
            dd.year AS year,
            DATE_FORMAT(dd.date, '%%Y-%%m-%%d') AS date
        FROM event_fact AS ef
        RIGHT JOIN (
          SELECT
            date, week, year, date_sk
          FROM date_dim
          WHERE
           date_dim.date >= DATE(%s)
           AND date_dim.date <= DATE(%s)
        )
        AS dd ON dd.date_sk = ef.date_sk
        LEFT JOIN project_dim AS pd on pd.project_sk = ef.project_sk
        WHERE (
          ef.event_sk = (
            SELECT  event_sk
            FROM  event_dim
            WHERE action_name =  %s
          )
          OR ef.event_sk IS NULL
        )
        {0}
        GROUP BY dd.week
        ORDER BY dd.date ASC
        '''.format(qwhere)

        results = []

        with analytical_query(cursors.DictCursor) as cursor:
            cursor.execute(query, (startstr, endstr, event_type))
            results = [row for row in cursor]

        # Sort the results based on project count key (as it cannot be be done in SQL)
        if max:
            # Take top results but keep still the date order
            results = sorted(results, key=itemgetter('count'), reverse=True)
            results = results[:max]
            results = sorted(results, key=itemgetter('date'))

        return results
 def applied(self):
     with analytical_query() as cursor:
         cursor.execute('SHOW TABLES LIKE "data_queue"')
         for row in cursor:
             return True
     return False