def handler(event, context): try: lambda_client = boto3.client('lambda') ENV = os.environ['ENV'] if event.get('queryStringParameters'): print("Invoke issue type etl in {}".format(ENV)) function_name = "vger-sls-jira-issue-type-etl-{}".format(ENV) lambda_client.invoke(FunctionName=function_name, InvocationType="Event") payload = {"message": "Successfully invoked issue type ETL"} else: lambda_preprocessor = VgerRedshiftPreprocessor(event) lambda_preprocessor.verify_project_id() lambda_preprocessor.validate_project_id() project_id = lambda_preprocessor.param["project_id"] print("Invoke etl for project {} in {}".format(project_id, ENV)) function_name = "vger-sls-jira-etl-{}".format(ENV) json_payload = json.dumps({"id": project_id}) lambda_client.invoke(FunctionName=function_name, InvocationType="Event", Payload=json_payload) payload = {"message": "Successfully invoked JIRA ETL for project {}".format(project_id)} response = response_formatter(status_code='200', body=payload) except LambdaPreprocessorError as e: response = e.args[0] except Exception as e: payload = {'message': 'Internal error: {}'.format(e)} response = response_formatter(status_code='500', body=payload) return response
def validate_jira_board_name(self): try: if self.param.get("board_name"): encoded_board_name = quote(self.param["board_name"], safe='') # Jira only allows to query board names that contain a string # which may result in multiple values returned JIRA_BOARD_API = web_api_constants.BOARD_NAME_URL.format(self.jira_config["JIRA_URL"], encoded_board_name) content = requests.get(JIRA_BOARD_API, auth=(self.jira_config["JIRA_USER"], self.jira_config["JIRA_PASS"])).json() boards = content['values'] if boards: for board in boards: if self.param["board_name"] == board["name"]: self.param["board_id"] = board["id"] if not self.param.get("board_id"): payload = {'message': 'Do you mean one of the following boards: {}'.format( ",".join([board["name"] for board in boards]))} return response_formatter(status_code='400', body=payload) else: payload = {'message': 'Can not find board {} in JIRA'.format(self.param["board_name"])} return response_formatter(status_code='404', body=payload) else: payload = {'message': 'No board name can be found in the query parameters.'} return response_formatter(status_code='404', body=payload) except Exception as e: payload = {'message': 'Internal Error: {}'.format(e)} return response_formatter(status_code='500', body=payload)
def handler(event, context): try: lambda_preprocessor = VgerGitPRPreprocessor(event) # General validation and generation lambda_preprocessor.verify_project_id() lambda_preprocessor.validate_project_id() lambda_preprocessor.generate_query_parameters(category="repo") lambda_preprocessor.verify_project_repo() lambda_preprocessor.generate_rolling_window_weeks() lambda_preprocessor.generate_time_interval_date(trace_back=True) rolling_window_weeks = lambda_preprocessor.param[ "rolling_window_weeks"] pr_counts, total_weeks_list = lambda_preprocessor.get_merged_pull_requests( ) num_merged_pull_requests = [data[0] for data in pr_counts] rolling_weeks_used = [] coefficient_of_variation = [] # For all the weeks in rollingWeeks perform the throughput calculations moving the window # each time for index in range(len(total_weeks_list)): if index + rolling_window_weeks >= len(total_weeks_list): break closed_pull_requests_subset = num_merged_pull_requests[ index:index + rolling_window_weeks] std = numpy.std(closed_pull_requests_subset) mean = numpy.mean(closed_pull_requests_subset) if mean == 0: coefficient_of_variation.append(0) else: coefficient_of_variation.append(std / mean) week = pytz.utc.localize( total_weeks_list[index + rolling_window_weeks]).isoformat() rolling_weeks_used.append(week) payload = zip(rolling_weeks_used, coefficient_of_variation) response = response_formatter(status_code='200', body=payload) except LambdaPreprocessorError as e: response = e.args[0] except Exception as e: payload = {'message': 'Internal error: {}'.format(e)} response = response_formatter(status_code='500', body=payload) return response
def generate_query_parameters(self, category=""): try: if category == "board_name": self.param["board_name"] = self.event.get("queryStringParameters").get("boardName") except Exception as e: payload = {'message': 'Invalid query parameters: {0}'.format(e)} return response_formatter(status_code='404', body=payload)
def verify_project_id(self): try: self.param["project_id"] = self.event.get('pathParameters').get( 'id') except Exception as e: payload = { 'message': 'Missing Attribute in path parameters: {}'.format(e) } return response_formatter(body=payload)
def validate_project_id(self): try: self.redshift.validateProjectID(self.param["project_id"]) except Exception as e: payload = { 'message': 'Project with id={0} cannot be found: {1}'.format( self.param["project_id"], e) } return response_formatter(status_code='404', body=payload)
def handler(event, context): try: jira_preprocessor = VgerJiraPreprocessor(event) jira_preprocessor.generate_query_parameters(category="board_name") jira_preprocessor.validate_jira_board_name() issue_filter = jira_preprocessor.get_board_jql() payload = {'issue_filter': issue_filter} response = response_formatter(status_code='200', body=payload) except LambdaPreprocessorError as e: response = e.args[0] except Exception as e: payload = {'message': 'Internal error: {}'.format(e)} response = response_formatter(status_code='500', body=payload) return response
def generate_rolling_window_weeks(self): try: rolling_window_days = self.redshift.selectRollingWindow( self.param["project_id"]) self.param["rolling_window_weeks"] = rolling_window_days // 7 except Exception as e: payload = { 'message': 'Error on calculating rolling window weeks: {}'.format(e) } return response_formatter(status_code='500', body=payload)
def verify_project_repo(self): try: if self.param.get("repo_list"): db_repo = self.redshift.getRepos(self.param["project_id"]) invalid_repo = [ str(repo) for repo in self.param["repo_list"] if str(repo) not in db_repo ] if invalid_repo: raise ValueError(invalid_repo) except Exception as e: payload = {'message': 'Invalid repository request: {}'.format(e)} return response_formatter(status_code='404', body=payload)
def generate_time_interval_date(self, trace_back=False): try: time_interval_calculator = TimeIntervalCalculator( self.param["query_date_until"], self.param["query_date_since"], self.param["days"]) # Shift back one week to count PRs made in the week before the following Monday time_interval_calculator.decrementDateSinceWeeks( self.param["rolling_window_weeks"] if trace_back else 1) self.param["date_since"] = time_interval_calculator.getDateSince() self.param["date_until"] = time_interval_calculator.getDateUntil() except ValueError as e: payload = {'message': 'Invalid date request: {}'.format(e)} return response_formatter(status_code='404', body=payload)
def handler(event, context): try: lambda_preprocessor = VgerRedshiftPreprocessor(event) lambda_preprocessor.verify_project_id() lambda_preprocessor.validate_project_id() query = "SELECT last_etl_run FROM team_project_etl WHERE team_project_id = %s" cursor = lambda_preprocessor.redshift.getCursor() cursor.execute(query, (lambda_preprocessor.param["project_id"], )) result = cursor.fetchone() if result is None or result[0] is None: payload = {"last_etl_run": None} else: payload = {"last_etl_run": int(result[0])} response = response_formatter(status_code='200', body=payload) except LambdaPreprocessorError as e: response = e.args[0] except Exception as e: payload = {"message": "Internal Error: {}".format(e)} response = response_formatter(status_code="500", body=payload) return response
def generate_query_parameters(self, category="", time=True): try: query_param = QueryParameters(self.event) if time: self.param["days"] = query_param.getDays() self.param["query_date_until"] = query_param.getDate( 'dateUntil') self.param["query_date_since"] = query_param.getDate( 'dateSince') if category == "repo": self.param["repo_list"] = query_param.getRepoName().split( ',') if query_param.getRepoName() else [] except Exception as e: payload = {'message': 'Invalid query parameters: {0}'.format(e)} return response_formatter(status_code='404', body=payload)
def handler(event, context): try: lambda_preprocessor = VgerGitPRPreprocessor(event) # General validation and generation lambda_preprocessor.verify_project_id() lambda_preprocessor.validate_project_id() lambda_preprocessor.generate_query_parameters(category="repo") lambda_preprocessor.verify_project_repo() lambda_preprocessor.generate_rolling_window_weeks() lambda_preprocessor.generate_time_interval_date(trace_back=False) failed_pr_volumes, total_weeks_list = lambda_preprocessor.get_failed_pull_requests_volume( ) data = {} for week in total_weeks_list: data[str(week)] = {"Merged": 0, "Rejected": 0} start_time = total_weeks_list[0] end_time = total_weeks_list[-1] for result in failed_pr_volumes: pr_number, created_time, closed_time, volume, completed = result if created_time < start_time: week_start_time = start_time else: week_start_date = created_time - datetime.timedelta( days=created_time.weekday()) week_start_time = datetime.datetime( week_start_date.year, week_start_date.month, week_start_date.day) + datetime.timedelta(weeks=1) if closed_time > end_time: week_end_time = end_time else: week_end_date = closed_time - datetime.timedelta( days=closed_time.weekday()) week_end_time = datetime.datetime( week_end_date.year, week_end_date.month, week_end_date.day) + datetime.timedelta(weeks=1) while week_start_time <= week_end_time: if completed: data[str(week_start_time)]["Merged"] += volume else: data[str(week_start_time)]["Rejected"] += volume week_start_time += datetime.timedelta(weeks=1) payload = dict() payload["Rejected Volume"] = [[ pytz.utc.localize(week).isoformat(), data[str(week)]["Rejected"] ] for week in total_weeks_list] payload["Merged Volume"] = [[ pytz.utc.localize(week).isoformat(), data[str(week)]["Merged"] ] for week in total_weeks_list] response = response_formatter(status_code='200', body=payload) except LambdaPreprocessorError as e: response = e.args[0] except Exception as e: payload = {'message': 'Internal error: {}'.format(e)} response = response_formatter(status_code='500', body=payload) return response
def handler(event, context): # Verify proejct id from path parameters try: lambda_preprocessor = VgerGitPRPreprocessor(event) # General validation and generation lambda_preprocessor.verify_project_id() lambda_preprocessor.validate_project_id() lambda_preprocessor.generate_query_parameters(category="repo") lambda_preprocessor.verify_project_repo() lambda_preprocessor.generate_rolling_window_weeks() lambda_preprocessor.generate_time_interval_date(trace_back=False) pr_counts, total_weeks_list = lambda_preprocessor.get_merged_pull_requests( ) num_merged_pull_requests = [data[0] for data in pr_counts] weeks = [ pytz.utc.localize(week).isoformat() for week in total_weeks_list[1:] ] payload = zip(weeks, num_merged_pull_requests) #for striaght line percentile calculations print(num_merged_pull_requests) organizedTotals = num_merged_pull_requests organizedTotals = sorted(organizedTotals) lengthOfDataSet = len(organizedTotals) # Calculate striaght percentile values using the R7 statistical method # https://en.wikipedia.org/wiki/Quantile (find: R-7) ninetiethPercentilesStraightPoint = R7PercentileCalculator( 90.0, organizedTotals, lengthOfDataSet) eightiethPercentilesStraightPoint = R7PercentileCalculator( 80.0, organizedTotals, lengthOfDataSet) fiftiethPercentilesStraightPoint = R7PercentileCalculator( 50.0, organizedTotals, lengthOfDataSet) twentiethPercentilesStraightPoint = R7PercentileCalculator( 20.0, organizedTotals, lengthOfDataSet) tenthPercentilesStraightPoint = R7PercentileCalculator( 10.0, organizedTotals, lengthOfDataSet) #make each "straight percentile" an array of values of equal length to ninetiethPercentilesStraight = [ninetiethPercentilesStraightPoint ] * lengthOfDataSet eightiethPercentilesStraight = [eightiethPercentilesStraightPoint ] * lengthOfDataSet fiftiethPercentilesStraight = [fiftiethPercentilesStraightPoint ] * lengthOfDataSet twentiethPercentilesStraight = [twentiethPercentilesStraightPoint ] * lengthOfDataSet tenthPercentilesStraight = [tenthPercentilesStraightPoint ] * lengthOfDataSet payload.append(["fiftiethStraight", fiftiethPercentilesStraight]) payload.append(["eightiethStraight", eightiethPercentilesStraight]) payload.append(["ninetiethStraight", ninetiethPercentilesStraight]) payload.append(["twentiethStraight", twentiethPercentilesStraight]) payload.append(["tenthStraight", tenthPercentilesStraight]) print(payload) response = response_formatter(status_code='200', body=payload) except LambdaPreprocessorError as e: response = e.args[0] except Exception as e: payload = {'message': 'Internal error: {}'.format(e)} response = response_formatter(status_code='500', body=payload) # Lambda must return a response return response
def handler(event, context): # Grab the data passed to the lambda function through the browser URL (API Gateway) try: projectID = event.get('pathParameters').get('id') except Exception as e: payload = {"message": "Id path parameter not given: {]".format(e)} return response_formatter(status_code=404, body=payload) redshift = RedshiftConnection() try: redshift.validateProjectID(projectID) except Exception as e: redshift.closeConnection() payload = {"message": "No resource with project ID {} found: {}".format(projectID, e)} return response_formatter(status_code=404, body=payload) try: # Grab the query string parameter of offset(days), dateUntil, dateSince, and workTypes queryParameters = QueryParameters(event) days = queryParameters.getDays() dateUntilParameter = queryParameters.getDate('dateUntil') dateSinceParameter = queryParameters.getDate('dateSince') workTypes = queryParameters.getWorktypes() workTypeParser = WorkTypeParser(workTypes, projectID) workTypeParser.validateWorkTypes(redshift.getCursor(), redshift.getConn()) rollingWindowDays = redshift.selectRollingWindow(projectID) # Convert rollingWindowDays to rollingWindowWeeks rollingWindowWeeks = int(math.floor(rollingWindowDays / 7.0)) timeInterval = TimeIntervalCalculator(dateUntilParameter, dateSinceParameter, days) timeInterval.decrementDateSinceWeeks(rollingWindowWeeks) except ValueError as err: redshift.closeConnection() payload = {"message": "{}".format(err)} return response_formatter(status_code=400, body=payload) # Get the actual start and end date after adding rolling weeks in epoch format dateSince = timeInterval.getDateSinceInt() dateUntil = timeInterval.getDateUntilInt() # Generate list of weeks endDate = dateUntil startDate = dateSince rollingWeeks = [startDate] secsPerWeek = 604800 # Insert into weeks all the mondays until dateUntil for label purposes while startDate < endDate: startDate += secsPerWeek rollingWeeks.append(startDate) # Init redshift connection connection_detail = { 'dbname': os.environ['DATABASE_NAME'], 'host': os.environ["CLUSTER_ENDPOINT"], 'port': os.environ['REDSHIFT_PORT'], 'user': os.environ['AWS_RS_USER'], 'password': os.environ['AWS_RS_PASS'] } conn = psycopg2.connect(**connection_detail) # Get the sequence for start and end states for current project default_state_query = """ SELECT seq_number FROM team_project, team_work_states WHERE team_project.id = %s AND team_work_states.team_project_id = %s AND (team_work_states.state_name = team_project.default_lead_time_start_state OR team_work_states.state_name = team_project.default_lead_time_end_state) ORDER BY seq_number """ with conn: with conn.cursor() as cur: cur.execute(default_state_query, (projectID, projectID)) default_state_results = cur.fetchall() start_state_seq = default_state_results[0][0] end_state_seq = default_state_results[1][0] # Get all work states for current project and generate dict for lead time calculation purposes work_state_query = """ SELECT state_name, seq_number FROM team_work_states WHERE team_project_id = %s ORDER BY seq_number """ with conn: with conn.cursor() as cur: cur.execute(work_state_query, (projectID,)) work_states_results = cur.fetchall() lead_time_states = [work_state for work_state, work_seq in work_states_results if start_state_seq <= work_seq < end_state_seq] work_states_dict = {work_seq: work_state for work_state, work_seq in work_states_results} # Filter out invalid issue types and resolutions issueTypesList = workTypeParser.issueTypesList invalidResolutionsList = workTypeParser.invalidResolutionsList # Init rolling interval dict for each week for output purposes rolling_intervals = {} for index in range(len(rollingWeeks)): if index + rollingWindowWeeks >= len(rollingWeeks): # Avoids indexing out of range break week = datetime.datetime.fromtimestamp(rollingWeeks[index + rollingWindowWeeks], tz=pytz.utc).isoformat() rolling_intervals[week] = { "rolling_interval_start": rollingWeeks[index], "rolling_interval_end": rollingWeeks[index+rollingWindowWeeks], "leadtime": { "Overall": [] } } for state in lead_time_states: rolling_intervals[week]["leadtime"][state] = [] # Get all issues within time range and their work state changing history issue_query = """ SELECT issue_key, listagg(CASE WHEN s1.seq_number IS NULL THEN -1 ELSE s1.seq_number END,',') within group(ORDER BY issue_change.changed) AS prev_number_seq, listagg(CASE WHEN s2.seq_number IS NULL THEN -1 ELSE s2.seq_number END,',') within group(ORDER BY issue_change.changed) AS new_number_seq, listagg(issue_change.changed,',') within group(ORDER BY issue_change.changed) AS changed_seq FROM issue_change LEFT JOIN (SELECT team_status_states.team_project_id, team_status_states.status, team_status_states.state_name, team_work_states.seq_number FROM team_status_states LEFT JOIN team_work_states ON team_status_states.team_project_id = team_work_states.team_project_id AND team_status_states.state_name = team_work_states.state_name) s1 ON s1.team_project_id = issue_change.team_project_id AND s1.status = issue_change.prev_value LEFT JOIN (SELECT team_status_states.team_project_id, team_status_states.status, team_status_states.state_name, team_work_states.seq_number FROM team_status_states LEFT JOIN team_work_states ON team_status_states.team_project_id = team_work_states.team_project_id AND team_status_states.state_name = team_work_states.state_name) s2 ON s2.team_project_id = issue_change.team_project_id AND s2.status = issue_change.new_value WHERE issue_change.team_project_id = %s AND field_name = 'Status' AND (%s = 0 OR issue_type IN %s) AND (%s = 0 OR resolution NOT IN %s) GROUP BY issue_key """ with conn: with conn.cursor() as cur: cur.execute(issue_query, (projectID, 1 if issueTypesList else 0, tuple(issueTypesList) if issueTypesList else (None,), 1 if invalidResolutionsList else 0, tuple(invalidResolutionsList) if invalidResolutionsList else (None,) ) ) results = cur.fetchall() # Convert results to dict format issues = [{"issue_name": result[0], "raw_info": zip(result[1].split(","), result[2].split(","), result[3].split(",")), "latest_seq": int(result[2].split(",")[-1]) } for result in results] # If latest/current status is not after lead time end state, it means issue is not done and should be filtered out # This keeps only finished issues in result set, meaning every issue will now have all worktime states and will be a finished issue issues = [issue for issue in issues if issue["latest_seq"] >= end_state_seq] # issues = [issue for issue in issues if issue["leadtime"].get("Overall")] # still need to filter out issues that were closed before or after dateSince/DataUntil counter = 0 issuesToDelete = [] #since poping shifts the indices, each time something needs to be poped, must be subtracted by number of pops needing to be done numOfPops = 0 for issue in issues: isIssueDeleted = False # Init lead time dictionary issue["leadtime"] = {el: 0 for el in [item for item in lead_time_states]} # Find the first time to get into leadtime state from pre-leadtime state for info in issue["raw_info"]: prev_seq_number = int(info[0]) next_seq_number = int(info[1]) state_transition_time = int(info[2]) if prev_seq_number < start_state_seq <= next_seq_number < end_state_seq: issue["start_state_time"] = state_transition_time break # Find the last time to get into post-leadtime state from leadtime state for info in reversed(issue["raw_info"]): prev_seq_number = int(info[0]) next_seq_number = int(info[1]) state_transition_time = int(info[2]) if start_state_seq <= prev_seq_number < end_state_seq <= next_seq_number: issue["end_state_time"] = state_transition_time break #if issue was completed before or after the set amount of time passed into leadtime script, remove it from issues if ("end_state_time" in issue) and (issue["end_state_time"] < int(dateSince) or issue["end_state_time"] > int(dateUntil)) and isIssueDeleted == False: issuesToDelete.append(counter-numOfPops) numOfPops = numOfPops + 1 isIssueDeleted = True # Calculate overall leadtime if issue.get("start_state_time") and issue.get("end_state_time"): start_time = datetime.datetime.fromtimestamp(issue["start_state_time"]) end_time = datetime.datetime.fromtimestamp(issue["end_state_time"]) issue_working_days = TimeIntervalCalculator.workday_diff(start_time, end_time) issue["leadtime"]["Overall"] = float("{0:.2f}".format(issue_working_days)) # if needed parameters don't exist, remove from loop elif isIssueDeleted == False: issuesToDelete.append(counter-numOfPops) numOfPops = numOfPops + 1 isIssueDeleted = True #remove issue if it is less than 15 minutes (0.01) to prevent issues from being displayed on chart as 0 if ("Overall" in issue["leadtime"]) and issue["leadtime"]["Overall"] < 0.01 and isIssueDeleted == False: issuesToDelete.append(counter-numOfPops) numOfPops = numOfPops + 1 isIssueDeleted = True counter = counter + 1 #issues = [issue for issue in issues if issue["leadtime"].get("Overall")] # Filter out if the issue did not have finish during the time period for num in issuesToDelete: issues.pop(num) for issue in issues: # Calculate lead time for each work state state_transition_time = -1 # Loop through the state changing history and add up lead time for all states for info in issue["raw_info"]: prev_work_state = work_states_dict.get(int(info[0])) new_state_transition_time = int(info[2]) if prev_work_state in lead_time_states and state_transition_time > 0: start_time = datetime.datetime.fromtimestamp(state_transition_time) end_time = datetime.datetime.fromtimestamp(new_state_transition_time) issue_working_days = TimeIntervalCalculator.workday_diff(start_time, end_time) issue["leadtime"][prev_work_state] += issue_working_days # Update for looping purposes state_transition_time = new_state_transition_time # Insert issue lead time into all intervals in ascending order for the percentile calculation for key, value in rolling_intervals.iteritems(): if (value["rolling_interval_start"] < issue["start_state_time"] < value["rolling_interval_end"] and value["rolling_interval_start"] < issue["end_state_time"] < value["rolling_interval_end"]): for state, leadtime in issue["leadtime"].iteritems(): insort(value["leadtime"][state], leadtime) # Init Output payload = { "fiftieth": {}, "eightieth": {}, "ninetieth": {} } for percentile, content in payload.iteritems(): for state in lead_time_states: content[state] = [] content["Overall"] = [] # Generate Output for key, value in rolling_intervals.iteritems(): for state, leadtime in value["leadtime"].iteritems(): payload["fiftieth"][state].append((key, percentile_calculation(0.5, leadtime))) payload["eightieth"][state].append((key, percentile_calculation(0.8, leadtime))) payload["ninetieth"][state].append((key, percentile_calculation(0.9, leadtime))) # Rearrange Output for percentile_res, content in payload.iteritems(): for state, leadtimes in content.iteritems(): leadtimes.sort(key=itemgetter(0)) return response_formatter(status_code=200, body=payload)
def handler(event, context): # Grab the data passed to the lambda function through the browser URL (API Gateway) try: projectID = event.get('pathParameters').get('id') except Exception as e: payload = {"message": "Id path parameter not given: {]".format(e)} return response_formatter(status_code=404, body=payload) redshift = RedshiftConnection() try: redshift.validateProjectID(projectID) except Exception as e: redshift.closeConnection() payload = { "message": "No resource with project ID {} found: {}".format(projectID, e) } return response_formatter(status_code=404, body=payload) try: # Grab the query string parameter of offset(days), dateUntil, dateSince, and workTypes queryParameters = QueryParameters(event) quarters = queryParameters.getQuarterDates().split(',') workTypes = queryParameters.getWorktypes() workTypeParser = WorkTypeParser(workTypes, projectID) workTypeParser.validateWorkTypes(redshift.getCursor(), redshift.getConn()) except ValueError as err: redshift.closeConnection() payload = {"message": "{}".format(err)} return response_formatter(status_code=400, body=payload) # Init redshift connection connection_detail = { 'dbname': os.environ['DATABASE_NAME'], 'host': os.environ["CLUSTER_ENDPOINT"], 'port': os.environ['REDSHIFT_PORT'], 'user': os.environ['AWS_RS_USER'], 'password': os.environ['AWS_RS_PASS'] } conn = psycopg2.connect(**connection_detail) # Get the sequence for start and end states for current project default_state_query = """ SELECT seq_number FROM team_project, team_work_states WHERE team_project.id = %s AND team_work_states.team_project_id = %s AND (team_work_states.state_name = team_project.default_lead_time_start_state OR team_work_states.state_name = team_project.default_lead_time_end_state) ORDER BY seq_number """ with conn: with conn.cursor() as cur: cur.execute(default_state_query, (projectID, projectID)) default_state_results = cur.fetchall() start_state_seq = default_state_results[0][0] end_state_seq = default_state_results[1][0] # Get all work states for current project and generate dict for lead time calculation purposes work_state_query = """ SELECT state_name, seq_number FROM team_work_states WHERE team_project_id = %s ORDER BY seq_number """ with conn: with conn.cursor() as cur: cur.execute(work_state_query, (projectID, )) work_states_results = cur.fetchall() lead_time_states = [ work_state for work_state, work_seq in work_states_results if start_state_seq <= work_seq < end_state_seq ] work_states_dict = { work_seq: work_state for work_state, work_seq in work_states_results } # Filter out invalid issue types and resolutions issueTypesList = workTypeParser.issueTypesList invalidResolutionsList = workTypeParser.invalidResolutionsList dateSince = quarters[-1] dateUntil = quarters[0] # Get all issues within time range and their work state changing history issue_query = """ SELECT issue_key, listagg(CASE WHEN s1.seq_number IS NULL THEN -1 ELSE s1.seq_number END,',') within group(ORDER BY issue_change.changed) AS prev_number_seq, listagg(CASE WHEN s2.seq_number IS NULL THEN -1 ELSE s2.seq_number END,',') within group(ORDER BY issue_change.changed) AS new_number_seq, listagg(issue_change.changed,',') within group(ORDER BY issue_change.changed) AS changed_seq FROM issue_change LEFT JOIN (SELECT team_status_states.team_project_id, team_status_states.status, team_status_states.state_name, team_work_states.seq_number FROM team_status_states LEFT JOIN team_work_states ON team_status_states.team_project_id = team_work_states.team_project_id AND team_status_states.state_name = team_work_states.state_name) s1 ON s1.team_project_id = issue_change.team_project_id AND s1.status = issue_change.prev_value LEFT JOIN (SELECT team_status_states.team_project_id, team_status_states.status, team_status_states.state_name, team_work_states.seq_number FROM team_status_states LEFT JOIN team_work_states ON team_status_states.team_project_id = team_work_states.team_project_id AND team_status_states.state_name = team_work_states.state_name) s2 ON s2.team_project_id = issue_change.team_project_id AND s2.status = issue_change.new_value WHERE issue_change.team_project_id = %s AND field_name = 'Status' AND (%s = 0 OR issue_type IN %s) AND (%s = 0 OR resolution NOT IN %s) GROUP BY issue_key """ with conn: with conn.cursor() as cur: cur.execute( issue_query, (projectID, 1 if issueTypesList else 0, tuple(issueTypesList) if issueTypesList else (None, ), 1 if invalidResolutionsList else 0, tuple(invalidResolutionsList) if invalidResolutionsList else (None, ))) results = cur.fetchall() # Convert results to dict format issues = [{ "issue_name": result[0], "raw_info": zip(result[1].split(","), result[2].split(","), result[3].split(",")), "latest_seq": int(result[2].split(",")[-1]) } for result in results] # If latest/current status is not after lead time end state, it means issue is not done and should be filtered out # This keeps only finished issues in result set, meaning every issue will now have all worktime states and will be a finished issue issues = [ issue for issue in issues if issue["latest_seq"] >= end_state_seq ] # still need to filter out issues that were closed before or after dateSince/DataUntil counter = 0 issuesToDelete = [] #since poping shifts the indices, each time something needs to be poped, must be subtracted by number of pops needing to be done numOfPops = 0 for issue in issues: isIssueDeleted = False # Init lead time dictionary issue["leadtime"] = { el: 0 for el in [item for item in lead_time_states] } # Find the first time to get into leadtime state from pre-leadtime state for info in issue["raw_info"]: prev_seq_number = int(info[0]) next_seq_number = int(info[1]) state_transition_time = int(info[2]) if prev_seq_number < start_state_seq <= next_seq_number < end_state_seq: issue["start_state_time"] = state_transition_time break # Find the last time to get into post-leadtime state from leadtime state for info in reversed(issue["raw_info"]): prev_seq_number = int(info[0]) next_seq_number = int(info[1]) state_transition_time = int(info[2]) if start_state_seq <= prev_seq_number < end_state_seq <= next_seq_number: issue["end_state_time"] = state_transition_time break #if issue was completed before or after the set amount of time passed into leadtime script, remove it from issues if ("end_state_time" in issue) and (issue["end_state_time"] < int(dateSince) or issue["end_state_time"] > int(dateUntil) ) and isIssueDeleted == False: issuesToDelete.append(counter - numOfPops) numOfPops = numOfPops + 1 isIssueDeleted = True # Calculate overall leadtime if issue.get("start_state_time") and issue.get("end_state_time"): start_time = datetime.datetime.fromtimestamp( issue["start_state_time"]) end_time = datetime.datetime.fromtimestamp(issue["end_state_time"]) issue_working_days = TimeIntervalCalculator.workday_diff( start_time, end_time) issue["leadtime"]["Overall"] = float( "{0:.2f}".format(issue_working_days)) # if needed parameters don't exist, remove from loop elif isIssueDeleted == False: issuesToDelete.append(counter - numOfPops) numOfPops = numOfPops + 1 isIssueDeleted = True #remove issue if it is less than 15 minutes (0.01) to prevent issues from being displayed on chart as 0 if ( "Overall" in issue["leadtime"] ) and issue["leadtime"]["Overall"] < 0.01 and isIssueDeleted == False: issuesToDelete.append(counter - numOfPops) numOfPops = numOfPops + 1 isIssueDeleted = True counter = counter + 1 # Filter out if the issue did not have finish during the time period for num in issuesToDelete: issues.pop(num) for issue in issues: # Calculate lead time for each work state state_transition_time = -1 # Loop through the state changing history and add up lead time for all states for info in issue["raw_info"]: prev_work_state = work_states_dict.get(int(info[0])) new_state_transition_time = int(info[2]) if prev_work_state in lead_time_states and state_transition_time > 0: start_time = datetime.datetime.fromtimestamp( state_transition_time) end_time = datetime.datetime.fromtimestamp( new_state_transition_time) issue_working_days = TimeIntervalCalculator.workday_diff( start_time, end_time) issue["leadtime"][prev_work_state] += issue_working_days # Update for looping purposes state_transition_time = new_state_transition_time payload = [] #create graph data set from data for issue in issues: obj = { 'name': issue['issue_name'], 'workingDays': issue['leadtime']['Overall'], 'endTime': issue['end_state_time'] } payload.append(obj) return response_formatter(status_code=200, body=payload)
def handler(event, context): try: lambda_preprocessor = VgerGitPRPreprocessor(event) # General validation and generation lambda_preprocessor.verify_project_id() lambda_preprocessor.validate_project_id() lambda_preprocessor.generate_query_parameters(category="repo") lambda_preprocessor.verify_project_repo() lambda_preprocessor.generate_rolling_window_weeks() lambda_preprocessor.generate_time_interval_date(trace_back=True) rolling_window_weeks = lambda_preprocessor.param[ "rolling_window_weeks"] pr_counts, total_weeks_list = lambda_preprocessor.get_merged_pull_requests( ) num_closed_pull_requests = [data[0] for data in pr_counts] rolling_weeks_used = [] tenth_percentiles = [] twentieth_percentiles = [] fiftieth_percentiles = [] eightieth_percentiles = [] ninetieth_percentiles = [] for index in range(len(total_weeks_list)): if index + rolling_window_weeks >= len(total_weeks_list): break closed_pull_requests_subset = num_closed_pull_requests[ index:index + rolling_window_weeks] sorted_weeks = sorted(closed_pull_requests_subset) tenth_percentiles.append(percentile_calculation(0.1, sorted_weeks)) twentieth_percentiles.append( percentile_calculation(0.2, sorted_weeks)) fiftieth_percentiles.append( percentile_calculation(0.5, sorted_weeks)) eightieth_percentiles.append( percentile_calculation(0.8, sorted_weeks)) ninetieth_percentiles.append( percentile_calculation(0.9, sorted_weeks)) week = pytz.utc.localize( total_weeks_list[index + rolling_window_weeks]).isoformat() rolling_weeks_used.append(week) payload = { "tenth": zip(rolling_weeks_used, tenth_percentiles), "twentieth": zip(rolling_weeks_used, twentieth_percentiles), "fiftieth": zip(rolling_weeks_used, fiftieth_percentiles), "eightieth": zip(rolling_weeks_used, eightieth_percentiles), "ninetieth": zip(rolling_weeks_used, ninetieth_percentiles) } response = response_formatter(status_code='200', body=payload) except LambdaPreprocessorError as e: response = e.args[0] except Exception as e: payload = {'message': 'Internal error: {}'.format(e)} response = response_formatter(status_code='500', body=payload) return response