def from_analytical(self, project_identifier, forum_id): """ Return user from analytical database """ query = """ SELECT forum_key, discussion_name, author, moderators, subscribers, subject, description, project_key, project_identifier, project_name FROM discussion_dim WHERE project_identifier = '%s' AND forum_key = %d AND VALID_TO IS NULL""" % (safe_string(project_identifier), safe_int(forum_id)) row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception( "Failed reading discussion forum from analytical db. {project: %s, forum: %d}" % (project_identifier, forum_id)) if not row: return None project = {'forum_key': row[0], 'discussion_name': MySQLdb.escape_string(row[1]), 'author': MySQLdb.escape_string(row[2]), 'moderators': MySQLdb.escape_string(row[3]), 'subscribers': MySQLdb.escape_string(row[4]), 'subject': MySQLdb.escape_string(row[5]), 'description': MySQLdb.escape_string(row[6]), 'project_key': row[7], 'project_identifier': MySQLdb.escape_string(row[8]), 'project_name': MySQLdb.escape_string(row[9]) } return project
def from_analytical(self, identifier): """ Return project from analytical database """ query = """ SELECT identifier, project_name, author, created, updated, published, project_key FROM project_dim WHERE identifier = '%s' AND VALID_TO IS NULL""" % identifier row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception("Getting project from analytical db failed. project identifier : %s" % identifier) if not row: return None project = {'identifier': MySQLdb.escape_string(row[0]), 'project_name': MySQLdb.escape_string(row[1]), 'author': MySQLdb.escape_string(row[2]), 'created': row[3], 'updated': row[4], 'published': row[5], 'project_key': row[6]} return project
def _init_sk_cache(self): """ Reads all the surrogate keys from the database """ with analytical_query() as cursor: try: query = """SELECT context_sk, context, environment_type, path_info FROM context_dim""" cursor.execute(query) for row in cursor: is_home = row[2] == 'home' is_prefix = row[3].endswith('.*') sk = row[0] if is_home and is_prefix: self.home_prefix_cache[row[3][:-2]] = sk continue if is_home and not is_prefix: self.home_exact_cache[row[3]] = sk continue if not is_home and is_prefix: self.project_prefix_cache[row[3][:-2]] = sk continue if not is_home and not is_prefix: self.project_exact_cache[row[3]] = sk continue except: conf.log.exception("Failed reading context sk")
def from_analytical(self, username): """ Return user from analytical database """ query = """ SELECT username, mail, mobile, givenName, lastName, authentication, status, user_key FROM user_dim WHERE username = %s AND VALID_TO IS NULL """ row = [] with analytical_query() as cursor: try: cursor.execute(query, username) row = cursor.fetchone() except: conf.log.exception( "Failed reading user from operational database. username : %s" % username) if not row: return None user = { 'username': row[0], 'mail': row[1], 'mobile': row[2], 'givenName': row[3], 'lastName': row[4], 'authentication': row[5], 'status': row[6], 'user_key': row[7] } return user
def from_analytical(self, identifier): """ Return project from analytical database """ query = """ SELECT identifier, project_name, author, created, updated, published, project_key FROM project_dim WHERE identifier = '%s' AND VALID_TO IS NULL""" % identifier row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception( "Getting project from analytical db failed. project identifier : %s" % identifier) if not row: return None project = { 'identifier': MySQLdb.escape_string(row[0]), 'project_name': MySQLdb.escape_string(row[1]), 'author': MySQLdb.escape_string(row[2]), 'created': row[3], 'updated': row[4], 'published': row[5], 'project_key': row[6] } return project
def from_analytical(self, username): """ Return user from analytical database """ query = """ SELECT username, mail, mobile, givenName, lastName, authentication, status, user_key FROM user_dim WHERE username = %s AND VALID_TO IS NULL """ row = [] with analytical_query() as cursor: try: cursor.execute(query, username) row = cursor.fetchone() except: conf.log.exception("Failed reading user from operational database. username : %s" % username) if not row: return None user = { 'username': row[0], 'mail': row[1], 'mobile': row[2], 'givenName': row[3], 'lastName': row[4], 'authentication': row[5], 'status': row[6], 'user_key': row[7] } return user
def _read_discussion_surrogate_keys(self): query = "SELECT project_identifier, forum_key, project_key FROM discussion_dim WHERE VALID_TO IS NULL" with analytical_query() as cursor: try: cursor.execute(query) for row in cursor: cache_key = self.cache_key(row[0], row[1]) self.sk_cache[cache_key] = row[2] except: conf.log.exception("Failed reading discussion surrogate keys")
def _read_project_surrogate_keys(self): query = "SELECT identifier, project_sk FROM project_dim WHERE VALID_TO IS NULL" with analytical_query() as cursor: try: cursor.execute(query) for row in cursor: self.sk_cache[row[0]] = row[1] except: conf.log.exception("Failed reading project surrogate keys")
def applied(self): with analytical_query() as cursor: cursor.execute(''' SELECT COUNT(*) AS count FROM %s.context_dim WHERE context = 'Admin Project VCM' ''' % conf.db_analytical_schema_name) row = cursor.fetchone() if row[0] == 1: return True return False
def applied(self): with analytical_query() as cursor: cursor.execute(''' SELECT COUNT(*) AS count FROM %s.context_dim WHERE (context IN ('Project permissions','Home permissions','Admin Users','Admin Groups', 'Group templates')) ''' % conf.db_analytical_schema_name) row = cursor.fetchone() if row[0] == 6: return True return False
def _init_active_user_sks(self): """ Returns a list of users that are active {username => user_sk} """ query = "SELECT username, user_sk FROM user_dim WHERE VALID_TO IS NULL" with analytical_query() as cursor: try: cursor.execute(query) for row in cursor: self.sk_cache[row[0]] = row[1] except: conf.log.exception("Failed reading user surrogate keys with query '%s'" % query)
def applied(self): """ Check if column exists or not :returns: True if exists, otherwise False """ action_names = [] with analytical_query() as cursor: cursor.execute('SELECT action_name FROM %s.event_dim' % conf.db_analytical_schema_name) action_names = [row[0] for row in cursor] # Check if all actions can be found from event_dim return not all([(action not in action_names) for action, crud, context in self.actions])
def _read_data_management_plans(self): """ Reads current data management plan table """ self.data_management_plans = [] query = ("SELECT `table`, buffer_partitions, partition_size, partition_count, archive_table, " "VALID_FROM, VALID_TO FROM data_management") with analytical_query() as cursor: try: cursor.execute(query) for row in cursor.fetchall(): self.data_management_plans.append(TableDataManagementPlan(row)) except: conf.log.exception("Failed reading data management plans")
def _init_active_user_sks(self): """ Returns a list of users that are active {username => user_sk} """ query = "SELECT username, user_sk FROM user_dim WHERE VALID_TO IS NULL" with analytical_query() as cursor: try: cursor.execute(query) for row in cursor: self.sk_cache[row[0]] = row[1] except: conf.log.exception( "Failed reading user surrogate keys with query '%s'" % query)
def _inapplicable_sk_row(self): query = """ SELECT discussion_sk FROM discussion_dim WHERE project_identifier = '<Inapplicable>' AND discussion_name = '<Inapplicable>'""" row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception("Getting inapplicable discussion sk failed") return row
def applied(self): """ Check if column exists or not :returns: True if exists, otherwise False """ action_names = [] with analytical_query() as cursor: cursor.execute('SELECT action_name FROM %s.event_dim' % conf.db_analytical_schema_name) action_names = [row[0] for row in cursor] # Check if all actions can be found from event_dim return not all([(action not in action_names) for action in self.actions.keys()])
def _read_data_management_plans(self): """ Reads current data management plan table """ self.data_management_plans = [] query = ( "SELECT `table`, buffer_partitions, partition_size, partition_count, archive_table, " "VALID_FROM, VALID_TO FROM data_management") with analytical_query() as cursor: try: cursor.execute(query) for row in cursor.fetchall(): self.data_management_plans.append( TableDataManagementPlan(row)) except: conf.log.exception("Failed reading data management plans")
def get_active_project_sk(self, identifier): query = """ SELECT project_sk FROM project_dim WHERE identifier = '%s' AND VALID_TO IS NULL""" % identifier row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception("Getting project sk failed. project identifier : %s" % identifier) if not row: return None self.sk_cache[identifier] = row[0] return row[0]
def get_active_user_sk(self, username): query = """ SELECT user_sk FROM user_dim WHERE username = '******' AND VALID_TO IS NULL""" % username row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception("Failed reading users surrogate key. %s" % username) if not row: return None self.sk_cache[username] = row[0] return row[0]
def date_sk(self, sql_date_string): """ From python date to date_sk """ query = """ SELECT date_sk FROM date_dim WHERE date = '%s' """ % sql_date_string[:10] row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception("Getting date surrogate key failed. %s" % sql_date_string) if not row: return None return row[0]
def _partitions(self): query = "EXPLAIN PARTITIONS SELECT * FROM %s" % self.table row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception("Failed reading partitions from the database") partition_names = [] if len(row) > 3: partition_names = row[3].split(',') partitions = [] for partition_name in partition_names: partitions.append(int(partition_name.split('_')[1])) return partitions
def _partitions(self): query = "EXPLAIN PARTITIONS SELECT * FROM %s" % self.table row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception( "Failed reading partitions from the database") partition_names = [] if len(row) > 3: partition_names = row[3].split(',') partitions = [] for partition_name in partition_names: partitions.append(int(partition_name.split('_')[1])) return partitions
def get_active_project_sk(self, identifier): query = """ SELECT project_sk FROM project_dim WHERE identifier = '%s' AND VALID_TO IS NULL""" % identifier row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception( "Getting project sk failed. project identifier : %s" % identifier) if not row: return None self.sk_cache[identifier] = row[0] return row[0]
def _get_members_activity(self, starttime, endtime, max=0): """ Implements the query: fetch most active (based on user_activity information) users :param datetime starttime: Start time of the query :param datetime endtime: End time of the query :param int max: Optional limiter of max entries. Zero means no limit (default) :returns: List of members, ordered by activity :rtype: list """ results = [] qlimit = 'LIMIT %d' % max if max else '' query = """ SELECT CONVERT(SUM(ua.count), SIGNED INTEGER) AS count, ud.user_key AS id, ud.username AS username, ud.givenName AS given_name, ud.mail AS mail, u.last_login AS last_login, ud.lastName AS last_name FROM user_activity_fact AS ua LEFT JOIN user_dim AS ud ON ud.user_sk = ua.user_sk LEFT JOIN date_dim AS dd ON dd.date_sk = ua.date_sk LEFT JOIN {0}.user AS u ON u.user_id = ud.user_key WHERE (dd.date >= %s AND dd.date <= %s) GROUP BY ud.user_key ORDER BY count DESC {1} """.format(conf.db_admin_schema_name, qlimit) # Fetch all the projects using time range only with analytical_query(cursors.DictCursor) as cursor: cursor.execute(query, (starttime, endtime)) for row in cursor: results.append(row) return results
def get_active_discussion_sk(self, project_identifier, forum_id): query = """ SELECT discussion_sk FROM discussion_dim WHERE project_identifier = '%s' AND forum_key = %d AND VALID_TO IS NULL""" % (project_identifier, forum_id) row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception("Getting discussion surrogate key failed. {project: %s, forum: %d}" % (project_identifier, forum_id)) if not row: return None cache_key = "%s.%d" % (project_identifier, forum_id) self.sk_cache[cache_key] = row[0] return row[0]
def get_active_discussion_sk(self, project_identifier, forum_id): query = """ SELECT discussion_sk FROM discussion_dim WHERE project_identifier = '%s' AND forum_key = %d AND VALID_TO IS NULL""" % (project_identifier, forum_id) row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception( "Getting discussion surrogate key failed. {project: %s, forum: %d}" % (project_identifier, forum_id)) if not row: return None cache_key = "%s.%d" % (project_identifier, forum_id) self.sk_cache[cache_key] = row[0] return row[0]
def event_sk(self, action): """ From event/action name to event surrogate key """ if action in self.key_cache: return self.key_cache[action] query = "SELECT event_sk FROM event_dim WHERE action_name = '%s' AND VALID_TO IS NULL" % action row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception("Getting event surrogate key failed. %s" % action) if not row: return None event_sk = row[0] self.key_cache[action] = event_sk return event_sk
def from_analytical(self, project_identifier, forum_id): """ Return user from analytical database """ query = """ SELECT forum_key, discussion_name, author, moderators, subscribers, subject, description, project_key, project_identifier, project_name FROM discussion_dim WHERE project_identifier = '%s' AND forum_key = %d AND VALID_TO IS NULL""" % (safe_string(project_identifier), safe_int(forum_id)) row = [] with analytical_query() as cursor: try: cursor.execute(query) row = cursor.fetchone() except: conf.log.exception( "Failed reading discussion forum from analytical db. {project: %s, forum: %d}" % (project_identifier, forum_id)) if not row: return None project = { 'forum_key': row[0], 'discussion_name': MySQLdb.escape_string(row[1]), 'author': MySQLdb.escape_string(row[2]), 'moderators': MySQLdb.escape_string(row[3]), 'subscribers': MySQLdb.escape_string(row[4]), 'subject': MySQLdb.escape_string(row[5]), 'description': MySQLdb.escape_string(row[6]), 'project_key': row[7], 'project_identifier': MySQLdb.escape_string(row[8]), 'project_name': MySQLdb.escape_string(row[9]) } return project
def _get_activity_statistics_per_project(self, starttime, endtime, event_type, project_ids, max=0): """ Retrieves the activity statistics per each project, per each week Returns results in a list (one entry per each project):: [ {'count':153, 'year':2012, 'week':1}, {'count':125, 'year':2012, 'week':2}, {'count':235, 'year':2012, 'week':3}, ] :param datetime starttime: Start time of the query :param datetime endtime: End time of the query :param str event_type: Name of event type to fetch statistics from. See :py:data:`multiproject.home.admin.analytics.EVENT_DESCRIPTIONS` :param list project_ids: Optional list of project ids to limit down the results :param int max: Optional limiter of max entries. Zero means no limit (default) :returns: Result list, containing dict entry for each week. Data also may contain some project info, but that is no to be trusted. :rtype: list """ projects = [] results = [] startstr = starttime.strftime('%Y-%m-%d') endstr = endtime.strftime('%Y-%m-%d') # Find out the project ids that actually have data (instead of iterating all the projects) fquery = ''' SELECT pd.project_key AS id, pd.identifier AS identifier, pd.project_name AS name, ef.event_sk AS event_sk FROM project_dim AS pd LEFT JOIN event_fact AS ef ON ef.project_sk = pd.project_sk WHERE pd.project_key IN ({0}) AND ef.timestamp >= %s AND ef.timestamp <= %s AND ef.event_sk = ( SELECT event_sk FROM event_dim WHERE action_name = %s ) GROUP BY pd.project_key '''.format(','.join([str(pi) for pi in project_ids])) with analytical_query(cursors.DictCursor) as cursor: cursor.execute(fquery, (starttime, endtime, event_type)) projects = [row for row in cursor] self.log.debug('Provided %d project ids, found %d interesting ones' % (len(project_ids), len(projects))) # SQL query for retrieving weekly statistics query = ''' SELECT COUNT(ef.event_sk) AS count, ef.event_sk, dd.week AS week, dd.year AS year, DATE_FORMAT(dd.date, '%%Y-%%m-%%d') AS date FROM event_fact AS ef RIGHT JOIN ( SELECT date, week, year, date_sk FROM date_dim WHERE date_dim.date >= DATE(%s) AND date_dim.date <= DATE(%s) ) AS dd ON dd.date_sk = ef.date_sk LEFT JOIN project_dim AS pd ON pd.project_sk = ef.project_sk WHERE ( ef.event_sk = %s OR ef.event_sk IS NULL ) {0} GROUP BY dd.week ORDER BY dd.date ASC ''' # Open cursor that we'll be used for reading different tables with analytical_query(cursors.DictCursor) as cursor: # Iterate each project to fetch and group weekly results per project for project in projects: # Retrieve statistics for each week, for the specified only qwhere = 'AND (pd.project_key = %d OR pd.project_sk IS NULL)' % project['id'] cursor.execute(query.format(qwhere), (startstr, endstr, project['event_sk'])) presults = [row for row in cursor] # Update project dict with additional info like download counts per project project['weeks'] = presults project['count'] = sum([r['count'] for r in presults]) results.append(project) # Sort the results based on project count key (as it cannot be be done in SQL) results = sorted(results, key=itemgetter('count'), reverse=True) # Limit down the top results if wanted if max: results = results[:max] return results
def _get_activity_statistics_per_week(self, starttime, endtime, event_type, project_ids=None, max=0): """ Retrieves the download statistics per each week, without making separation between the projects Returns results in a list (one entry per each week):: [ {'count':153, 'year':2012, 'week':1}, {'count':125, 'year':2012, 'week':2}, {'count':235, 'year':2012, 'week':3}, ] :param datetime starttime: Start time of the query :param datetime endtime: End time of the query :param str event_type: Name of event type to fetch statistics from. See :py:data:`multiproject.home.admin.analytics.EVENT_DESCRIPTIONS` :param list project_ids: Optional list of project ids to limit down the results :param int max: Optional limiter of max entries. Zero means no limit (default) :returns: Result list, containing dict entry for each week. Data also may contain some project info, but that is no to be trusted. :rtype: list """ qwhere = '' startstr = starttime.strftime('%Y-%m-%d') endstr = endtime.strftime('%Y-%m-%d') # Optional limit for specified projects if project_ids: qwhere = 'AND (pd.project_key IN (%s) OR pd.project_sk IS NULL)' % ','.join([str(int(pid)) for pid in project_ids]) query = ''' SELECT COUNT(ef.event_sk) AS count, pd.project_sk AS id, pd.project_name AS name, pd.identifier AS identifier, ef.event_sk, dd.week AS week, dd.year AS year, DATE_FORMAT(dd.date, '%%Y-%%m-%%d') AS date FROM event_fact AS ef RIGHT JOIN ( SELECT date, week, year, date_sk FROM date_dim WHERE date_dim.date >= DATE(%s) AND date_dim.date <= DATE(%s) ) AS dd ON dd.date_sk = ef.date_sk LEFT JOIN project_dim AS pd on pd.project_sk = ef.project_sk WHERE ( ef.event_sk = ( SELECT event_sk FROM event_dim WHERE action_name = %s ) OR ef.event_sk IS NULL ) {0} GROUP BY dd.week ORDER BY dd.date ASC '''.format(qwhere) results = [] with analytical_query(cursors.DictCursor) as cursor: cursor.execute(query, (startstr, endstr, event_type)) results = [row for row in cursor] # Sort the results based on project count key (as it cannot be be done in SQL) if max: # Take top results but keep still the date order results = sorted(results, key=itemgetter('count'), reverse=True) results = results[:max] results = sorted(results, key=itemgetter('date')) return results
def applied(self): with analytical_query() as cursor: cursor.execute('SHOW TABLES LIKE "data_queue"') for row in cursor: return True return False