def tickets_assigned_in_interval(sw: SnowflakeWrapper, developer_id: str, interval: Interval) -> pd.DataFrame: sql = "SELECT " \ " i.KEY, " \ " i.FIELDS:priority:name::string IssuePriority, " \ " i.FIELDS:issuetype:name::string IssueType, " \ " i.FIELDS:status: name::string status, " \ " SUM(1) DAYS_ASSIGNED " \ "FROM ( " days = [] for i in range((interval.toDate(raw=True) - interval.fromDate(raw=True)).days + 1): day = interval.fromDate(raw=True) + timedelta(days=i) sql_date = Interval.strdate(day) days.append( f" SELECT " f" DISTINCT KEY " f" FROM TIMELINES " f" WHERE " f" ASSIGNEE IN ('{developer_id}') " f" AND ( " f" {sql_date} BETWEEN DATEFROM AND DATETO " f" OR ( " f" DATEDIFF('day', {sql_date}, DATEFROM) = 0 " f" AND DATEDIFF('day', {sql_date}, DATETO) > 1 " f" ) " f" ) " ) sql += " UNION ALL ".join(days) + ") t INNER JOIN ISSUES i ON t.KEY = i.KEY GROUP BY 1, 2, 3, 4 ORDER BY 5, 1;" # print(sql) return sw.fetch_df(sql)
def get_authored_activity(sw: SnowflakeWrapper, interval: Interval, user_id: Union[None, list] = None): """ includes comments, changelogs where the [optional] userId has performed the changes :param sw: SnowflakeWrapper :param interval: [start, end) :param user_id: ID of a user [if not given gets all users] :return: """ ids = "" if user_id is None else f" USERID IN ({mask_in(user_id)}) AND " return sw.fetch_df( f"SELECT " f" USERID, " f" ARRAY_AGG( " f" OBJECT_CONSTRUCT( " f" 'key', KEY, " f" 'dateCreated', DATECREATED, " f" 'changelogItem', CHANGELOGITEM " f" ) " f" ) CHANGELOGITEMS " f"FROM CHANGELOGS " f"WHERE " f" {ids} " f" DATECREATED >= {interval.fromDate()} AND " f" DATECREATED < {interval.toDate()} " f"GROUP BY USERID;" )
def tickets_assigned_per_day(sw: SnowflakeWrapper, developer_id: str, interval: Interval) -> pd.DataFrame: sql = "SELECT " \ " DAY_ASSIGNED, "\ " KEY " \ "FROM ( " days = [] for i in range((interval.toDate(raw=True) - interval.fromDate(raw=True)).days + 1): day = interval.fromDate(raw=True) + timedelta(days=i) sql_date = Interval.strdate(day) days.append( f" SELECT " f" TO_DATE({sql_date}) AS DAY_ASSIGNED," f" KEY " f" FROM TIMELINES " f" WHERE " f" ASSIGNEE IN ('{developer_id}') " f" AND ( " f" {sql_date} BETWEEN DATEFROM AND DATETO " f" OR ( " f" DATEDIFF('day', {sql_date}, DATEFROM) = 0 " f" AND DATEDIFF('day', {sql_date}, DATETO) > 1 " f" ) " f" ) " f" GROUP BY 1, 2" ) sql += " UNION ALL ".join(days) + ") GROUP BY 1, 2 ORDER BY 1;" # print(sql) return sw.fetch_df(sql)
def get_ticket_counts(sw: SnowflakeWrapper, breakdowns: Union[list, None] = None) -> DataFrame: if breakdowns is None: breakdowns = ["issueType", "issuePriority", "resolved"] breakedown = { "issueType": "i.fields:issuetype:name::string issueType", "issuePriority": "i.fields:priority:name::string issuePriority", # "resolved": "IFF(i.fields:resolutiondate IS NULL, 'No', 'Yes') Resolved" "resolved": "IFF(i.FIELDS:resolution IS NULL AND i.FIELDS:status:name NOT IN ('Done', 'Cancelled', 'Live', 'Reverted'), 'No', 'Yes') Resolved" } dimensions = [breakedown[by] for by in breakdowns] group_by = ','.join([str(i + 1) for i, _ in enumerate(dimensions)]) sql = (f"SELECT " f" {','.join(dimensions)}, " f" count(*) Count " f"FROM " f" ISSUES i " f"GROUP BY {group_by} " f"ORDER BY {len(dimensions) + 1}, {group_by} DESC; ") # print(sql) return sw.fetch_df(sql)
def get_developer_ids(sw: SnowflakeWrapper) -> list: return sw.fetch_df( "SELECT USERKEY KEY " "FROM USERS " "WHERE ACTIVE = TRUE AND USERKEY NOT ILIKE '%addon%' AND USERKEY NOT ILIKE 'ID%' " "ORDER BY 1;" )['KEY'].tolist()
def get_unresolved_ticket_counts( sw: SnowflakeWrapper, interval: Interval, all_unresolved_until: bool = False, # disregards the interval.fromDate() breakdowns: Union[list, None] = None, ) -> DataFrame: if breakdowns is None: breakdowns = ["issueType", "issuePriority"] breakedown = { "issueType": "fields:issuetype:name::string issueType", "issuePriority": "fields:priority:name::string issuePriority", "status": "FIELDS:status:name::string status", } dimensions = [breakedown[by] for by in breakdowns] date_from = "" if all_unresolved_until else f"AND DateCreated >= TO_DATE({interval.fromDate()})" sql = ( f" SELECT KEY, " f" {','.join(dimensions)}, " f" convert_timezone('UTC', to_timestamp_tz(FIELDS:created::string, " f' \'YYYY-MM-DD"T"HH24:MI:SS.FF TZHTZM\')) DateCreated, ' f" DATEDIFF('day', DateCreated, {interval.toDate()}) daysUnresolved " f" FROM ISSUES " f" WHERE " f" FIELDS:resolution IS NULL " f" AND FIELDS:status:name::string NOT IN ('Done', 'Cancelled', 'Live', 'Reverted') " f" {date_from} " f" AND DateCreated < TO_DATE({interval.toDate()})" f" ORDER BY {len(dimensions) + 3}; ") # print(sql) return sw.fetch_df(sql)
def tickets_active_on_interval(sw: SnowflakeWrapper, interval: Interval, cols=None) -> Union[list, pd.DataFrame]: get_keys_sql = (f"SELECT " f" KEY " f"FROM " f" CHANGELOGS " f"WHERE " f" DATECREATED >= {interval.fromDate()} AND " f" DATECREATED < {interval.toDate()} " f"GROUP BY KEY") if cols is None: return sw.fetch_df(get_keys_sql)["KEY"].tolist() else: return sw.fetch_df(f"SELECT " f" {utils.mask_props(cols)} " f"FROM " f" ISSUES " f" WHERE " f" KEY IN ({get_keys_sql});")
def get_tickets(sw: SnowflakeWrapper, interval: Interval, resolved: bool = True, max_days: int = 10) -> DataFrame: resolution_date = f"{convert_date('i.FIELDS:resolutiondate')}" where_resolution = f"{resolution_date} >= {interval.fromDate()} AND {resolution_date} < {interval.toDate()}" if resolved else "" max_duration = f" AVG_DAYS <= {min(30, max_days)} " if max_days > 1 else " AVG_HOUR < 10 " sql = ( f'SELECT TICKET_KEY, ' f' STATUS, ' f' "IssueType", ' f' "IssuePriority", ' f' "States", ' f' "Transitions", ' f' "DegreeOfCycling", ' f' AVG_DAYS, ' f' MAX_DAYS, ' f' MIN_DAYS, ' f' AVG_HOUR, ' f' MAX_HOURS, ' f' MIN_HOURS ' f'FROM (SELECT t.KEY TICKET_KEY, ' f' STATUS, ' f' i.FIELDS:issuetype: name::string "IssueType", ' f' i.FIELDS:priority: name::string "IssuePriority", ' f' COUNT(DISTINCT t.STATUS) "States", ' f' COUNT(*) "Transitions", ' f' ("Transitions" / "States") - 1 "DegreeOfCycling", ' f' AVG(TIMEDELTA) / (60 * 60 * 24) AVG_DAYS, ' f' MAX(TIMEDELTA) / (60 * 60 * 24) MAX_DAYS, ' f' MIN(TIMEDELTA) / (60 * 60 * 24) MIN_DAYS, ' f' AVG(TIMEDELTA) / (60 * 60) AVG_HOUR, ' f' MAX(TIMEDELTA) / (60 * 60) MAX_HOURS, ' f' MIN(TIMEDELTA) / (60 * 60) MIN_HOURS ' f' FROM TIMELINES t ' f' INNER JOIN ISSUES i ON t.KEY = i.KEY ' f' WHERE t.KEY IN ' f' (SELECT DISTINCT KEY FROM CHANGELOGS WHERE DATECREATED >= {interval.fromDate()} AND DATECREATED < {interval.toDate()}) ' f' AND {where_resolution}' f' GROUP BY 1, 2, 3, 4) ' f'WHERE ' f' AVG_HOUR > 2 ' f' AND {max_duration} ' f'ORDER BY 1; ') # print(sql) df = sw.fetch_df(sql) df["AVG_HOUR"] = df["AVG_HOUR"].map(lambda x: np.nan if x is None else float(x)) df["AVG_DAYS"] = df["AVG_DAYS"].map(lambda x: np.nan if x is None else float(x)) df.insert(2, "MERGED_STATUS", map_statuses(df["STATUS"])) return df
def transition_frequency(sw: SnowflakeWrapper, interval: Interval, limit=10, order="DESC", by_status=True, by_week=False, by_issue_type=False, by_issue_priority=False, get_sql=False) -> Union[pd.DataFrame, str]: limit_query = f"LIMIT {limit}" if 0 < limit < 100 else "" breakdown_by_issue_type = f'I.FIELDS:issuetype:name::string "IssueType",' if by_issue_type else "" breakdown_by_issue_priority = f'I.FIELDS:priority:name::string "IssuePriority",' if by_issue_priority else "" breakdown_by_week = f'YEAR "Year", IFF(MONTH = 12 AND WEEKOFYEAR = 1, WEEKOFYEAR + 52, WEEKOFYEAR) "WeekOfYear",' if by_week else "" breakdown_by_status = f'CHANGELOGITEM:fromString::string || \' -> \' || CHANGELOGITEM:toString::string "Transition",' if by_status else "" join_issues = "INNER JOIN ISSUES I on C.KEY = I.KEY " if breakdown_by_issue_type or by_issue_priority else "" query_group_by_props = 0 + (2 if by_week else 0) + ( 1 if by_status else 0) + (1 if by_issue_type else 0) + (1 if by_issue_priority else 0) group_by = f"GROUP BY {','.join([str(i + 1) for i in range(query_group_by_props)])}" if query_group_by_props > 0 else "" query_order = order if order.upper() in ["ASC", "DESC"] else "DESC" if not by_week: order_by = f"ORDER BY {query_group_by_props + 1} {query_order}" else: order_by = f"ORDER BY {','.join([str(i + 1) for i in range(query_group_by_props + 1)])} {query_order}" # TODO: add join to weeks table which will yield NULLs wherever there are missing values # later on, the NULLs will be converted to zeros (0). sql = ( f'SELECT ' f' {breakdown_by_week} ' f' {breakdown_by_issue_type} ' f' {breakdown_by_issue_priority} ' f' {breakdown_by_status} ' f' COUNT(*) "TotalTransitions" ' f'FROM CHANGELOGS C ' f' {join_issues} ' f'WHERE ' f' C.CHANGELOGITEM:field::string = \'status\' ' f' AND C.DATECREATED >= {interval.fromDate()} AND C.DATECREATED < {interval.toDate()} ' f'{group_by} ' f'{order_by} ' f'{limit_query}; ') # print(sql) if get_sql: return sql return sw.fetch_df(sql)
def get_avg_timeline(sw: SnowflakeWrapper, interval: Interval) -> pd.DataFrame: query = (f'SELECT ' f' STATUS "Status", ' f' COUNT(DISTINCT KEY) "UniqueIssues", ' f' COUNT(*) "Issues", ' f' "Issues" - "UniqueIssues" "Reassignments", ' f' AVG(TIMEDELTA) / (60 * 60 * 24) "AvgDays", ' f' MAX(TIMEDELTA) / (60 * 60 * 24) "MaxDays", ' f' MIN(TIMEDELTA) / (60 * 60 * 24) "MinDays" ' f'FROM TIMELINES t ' f'WHERE ' f' t.DATEFROM >= {interval.fromDate()} ' f' AND t.DATETO < {interval.toDate()} ' f'GROUP BY 1 ' f'ORDER BY 1, 4 DESC; ') print(query) return sw.fetch_df(query)
def work_activity_on_interval( sw: SnowflakeWrapper, interval: Interval, ticket_keys: Union[None, list] = None) -> pd.DataFrame: """ Returns the work activity (changes of assignees and statuses) on a given interval. To be able to infer the beginning state of the issue at intervalStartDate, the activity is provided on the interval: [issueCreationDate, intervalEndDate) :param sw: SnowflakeWrapper :param interval: (from:date, to:date) :param ticket_keys: custom list of ticket keys :return: """ if ticket_keys is None: ticket_keys = tickets_active_on_interval(sw, interval) ids = f" c.KEY IN ({utils.mask_in(ticket_keys)}) AND " if interval or ids: print( "WARNING: Given interval and/or ids are being ignored at the moment." ) sql = ( f"SELECT " f" c.KEY, " f" i.{utils.decode_user('fields', 'reporter')} reporter, " f" TO_TIMESTAMP_NTZ(convert_timezone('UTC', to_timestamp_tz(i.{utils.decode_field('fields', 'created')}::string, 'YYYY-MM-DD\"T\"HH24:MI:SS.FF TZHTZM'))) dateCreated, " f" ARRAY_AGG( " f" OBJECT_CONSTRUCT( " f" 'author', USERID, " f" 'dateCreated', c.DATECREATED, " f" 'changelogItems', ARRAY_CONSTRUCT(CHANGELOGITEM) " f" ) " f" ) CHANGELOGITEMS " f"FROM CHANGELOGS c INNER JOIN ISSUES i ON c.KEY = i.KEY " f"WHERE " f" c.changelogItem:field IN ('status', 'assignee') AND " f" c.DATECREATED < {interval.toDate()} " f"GROUP BY 1, 2, 3 ") print(f"Executing: {sql}") changelog = sw.fetch_df(sql) changelog['CHANGELOGITEMS'] = changelog['CHANGELOGITEMS'].apply( lambda x: sort_and_merge( json.loads(x, object_pairs_hook=load_with_datetime))) return changelog
def get_aggregated_authored_activity(sw: SnowflakeWrapper, interval: Interval, user_id: Union[None, list] = None): ids = "" if user_id is None else f" USERID IN ({mask_in(user_id)}) AND" result = sw.fetch_df( f"SELECT " f" USERID, " f" ARRAY_AGG( " f" OBJECT_CONSTRUCT( " f" 'userId', USERID, " f" 'field', FIELD, " f" 'count', CNT " f" ) " f" ) ACTIVITY " f"FROM ( " f" SELECT " f" activity.USERID, " f" item.VALUE:field::string FIELD, " f" COUNT(*) CNT " f" FROM " f" (SELECT " f" USERID, " f" ARRAY_AGG(CHANGELOGITEM) CHANGELOGITEMS " f" FROM CHANGELOGS " f" WHERE " f" {ids} " f" DATECREATED >= {interval.fromDate()} " f" AND DATECREATED < {interval.toDate()} " f" GROUP BY USERID) activity, " f" lateral flatten(activity.CHANGELOGITEMS) item " f" GROUP BY 1, 2 " f") " f"GROUP BY 1 " f"ORDER BY 1; " ) return pd.concat( result['ACTIVITY'].apply( lambda x: pd.DataFrame(json.loads(x)) .pivot(index='userId', columns='field', values='count')).tolist(), # TODO Add breakdown by issue type sort=True)
def get_developers( sw: SnowflakeWrapper, interval: Interval, user_filters: Union[dict, None] = None, break_by: Union[list, None] = None, max_duration: int = 30, debug: bool = False, ) -> DataFrame: if user_filters is None: user_filters = {"status": "dev"} if break_by is None: break_by = ["assignee"] else: break_by = list(set(["assignee"] + break_by)) dimensions = { "type": ' i.FIELDS:issuetype:name::string "IssueType" ', "priority": ' i.FIELDS:priority:name::string "IssuePriority" ', "status": " STATUS ", "assignee": " ASSIGNEE ", } statuses = ','.join([f"'{s}'" for s in expand_statuses(user_filters["status"])]) if "status" in user_filters and \ user_filters["status"] else None filters = { "status": f"STATUS IN ({statuses})" if statuses else " 1 = 1 ", "priority": f'i.FIELDS:priority:name::string IN (\'{user_filters["priority"]}\')' if "priority" in user_filters and user_filters[ "priority"] else " 1 = 1 ", "type": f'i.FIELDS:issuetype:name::string IN (\'{user_filters["type"]}\')' if "type" in user_filters and user_filters["type"] else " 1 = 1" } sql_filters = ' AND '.join(filters.values()) dims = ','.join([dimensions[dim] for dim in break_by]) group_by = ','.join([str(i + 1) for i, _ in enumerate(break_by)]) max_duration = f" AVG_DAYS <= {min(30, max_duration)} " if max_duration > 1 else " AVG_HOURS < 10 " sql = ( f' SELECT * ' f' FROM ( ' f' SELECT ' f' {dims}, ' f' COUNT(DISTINCT t.KEY) "UniqueIssues", ' f' COUNT(t.KEY) "Issues", ' f' ("Issues" / "UniqueIssues") - 1 "DegreeOfCycling", ' f' AVG(TIMEDELTA) / (60 * 60 * 24) AVG_DAYS, ' f' MAX(TIMEDELTA) / (60 * 60 * 24) MAX_DAYS, ' f' MIN(TIMEDELTA) / (60 * 60 * 24) MIN_DAYS, ' f' AVG(TIMEDELTA) / (60 * 60) AVG_HOURS, ' f' MAX(TIMEDELTA) / (60 * 60) MAX_HOURS, ' f' MIN(TIMEDELTA) / (60 * 60) MIN_HOURS ' f' FROM TIMELINES t INNER JOIN ISSUES i ON t.KEY = i.KEY ' f' WHERE DATEFROM >= {interval.fromDate()} ' f' AND (DATETO < {interval.toDate()} OR DATETO IS NULL) ' f' AND ASSIGNEE IS NOT NULL ' f' AND {sql_filters}' f' GROUP BY {group_by} ' f' ) ' f' WHERE AVG_HOURS > 2 ' f' AND {max_duration} ' f' ORDER BY 1 ' ) if debug: print(sql) df = sw.fetch_df(sql) df["AVG_DAYS"] = df["AVG_DAYS"].map(lambda x: np.nan if x is None else float(x)) df["AVG_HOURS"] = df["AVG_HOURS"].map(lambda x: np.nan if x is None else float(x)) if "status" in break_by: df.insert(2, "MERGED_STATUS", map_statuses(df["STATUS"])) return df
def get_ticket_types(sw: SnowflakeWrapper) -> list: return sw.fetch_df( 'SELECT DISTINCT i.FIELDS:issuetype: name::string "IssueType" FROM ISSUES i;' )["IssueType"].to_list()
def get_ticket_priorities(sw: SnowflakeWrapper) -> list: return sw.fetch_df( 'SELECT DISTINCT i.FIELDS:priority: name::string "IssuePriority" FROM ISSUES i;' )["IssuePriority"].to_list()
def get_distinct_statuses(sw: SnowflakeWrapper) -> list: return sw.fetch_df("SELECT ID " "FROM STATUSES " "ORDER BY 1;")['ID'].tolist()