def computeDevActv(): # 几个重要时间点 time_now = time.time() time_now_str = _strtime_before_days(time_now, 0) time_before_1_window = _strtime_before_days(time_now, EXAMINE_WINDOW) # commits_before_1_window,issues_before_1_window,rel_before_1_window cbw, ibw, rbw = [], [], [] metrics = [cbw, ibw, rbw] for repo in REPOS: if repo in NONE_GH: cbw.append(None) ibw.append(None) rbw.append(None) else: # 几个重要集合 cbw.append( dbop.select_one( "select count(*) from commits_info where repo_id=%s and (author_date>%s and author_date<%s)", (repo, time_before_1_window, time_now_str))[0]) ibw.append( dbop.select_one( "select count(*) from issues_info where repo_id=%s and (created_at>%s and created_at<%s)", (repo, time_before_1_window, time_now_str))[0]) rbw.append( dbop.select_one( "select count(*) from releases_info where repo_id=%s and (created_at>%s and created_at<%s)", (repo, time_before_1_window, time_now_str))[0]) nor_metrics = [_nor_data(item) for item in metrics] for i in range(0, len(REPOS)): dbop.execute("insert into dev_actv(repo_id,dev,rel) values(%s,%s,%s)", (REPOS[i], _my_avg([nor_metrics[0][i], nor_metrics[1][i] ]), nor_metrics[2][i]))
def _socialfans_till_time(repo, dateTime): fans_fb_before_1_window = dbop.select_one( "select watches_num from facebook_data where coin_id=%s and created_time<=%s order by created_time desc limit 1", (repo, dateTime), (0, )) fans_tw_before_1_window = dbop.select_one( "select followers_num from twitters_data where coin_id=%s and created_time<=%s order by created_time desc limit 1", (repo, dateTime), (0, )) return fans_fb_before_1_window[0] + fans_tw_before_1_window[0]
def _get_last_commit_fetch(prj): last_page = dbop.select_one("select page from commits_json_raw where repo_id=%s order by id desc limit 1", (REPO_ID[prj],), (1,))[0] last_data_set = set([ item[0] for item in dbop.select_all("select sha from commits_info where repo_id=%s and page =%s", ( REPO_ID[prj],last_page))]) return last_page, last_data_set
def _get_last_issue_fetch(prj): # 获取上次记录,以及上次获得数据集合 last_page = dbop.select_one("select page from issues_json_raw where repo_id=%s order by id desc limit 1", (REPO_ID[prj],), (1,))[0] last_data_set = set([ item[0] for item in dbop.select_all("select number from issues_info where repo_id=%s and page =%s", ( REPO_ID[prj],last_page)) ]) return last_page, last_data_set
def computeINF(): # 几个时间点 time_now = time.time() time_now_str = _strtime_before_days(time_now, 0) time_before_1_window = _strtime_before_days(time_now, EXAMINE_WINDOW) fans = [[], [], []] fans_fb, fans_tw = [], [] for repo in REPOS: if repo in NONE_GH: #该项目在github上没有 for i in range(0, 3): fans[i].append(None) else: # 开发社区的值 fans_now = dbop.select_one( "select watch,star,fork from html_info where repo_id=%s and fetched_at<=%s order by fetched_at desc limit 1", (repo, time_now_str), (0, 0, 0)) fans_before = dbop.select_one( "select watch,star,fork from html_info where repo_id=%s and fetched_at<=%s order by fetched_at desc limit 1", (repo, time_before_1_window), (0, 0, 0)) # 计算指标变化量, !!还真有变少的, for i in range(0, 3): fans[i].append(fans_now[i] - fans_before[i]) # 社交社区 if repo in NONE_FB: fans_fb.append(None) else: fb_now = dbop.select_one( "select watches_num from facebook_data where coin_id=%s and created_time<=%s order by created_time desc limit 1", (repo, time_now_str), (0, )) fb_before = dbop.select_one( "select watches_num from facebook_data where coin_id=%s and created_time<=%s order by created_time desc limit 1", (repo, time_before_1_window), (0, )) fans_fb.append(fb_now[0] - fb_before[0]) if repo in NONE_TW: fans_tw.append(None) else: tw_now = dbop.select_one( "select followers_num from twitters_data where coin_id=%s and created_time<=%s order by created_time desc limit 1", (repo, time_now_str), (0, )) tw_before = dbop.select_one( "select followers_num from twitters_data where coin_id=%s and created_time<=%s order by created_time desc limit 1", (repo, time_before_1_window), (0, )) fans_tw.append(tw_now[0] - tw_before[0]) # 归一化 fans.extend([fans_fb, fans_tw]) fans = [_nor_data(item) for item in fans] for i in range(0, len(REPOS)): tmp_row = [] for j in range(0, len(fans)): tmp_row.append(fans[j][i]) dbop.execute( "insert into inf(repo_id,inf_dev,inf_social) values(%s,%s,%s)", (REPOS[i], _my_avg(tmp_row[0:3]), _my_avg(tmp_row[3:])))
def computeScore(): M1, M2, M3, M4, M5, M6 = {}, {}, {}, {}, {}, {} score = [] dateTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) for repo in REPOS: M1[repo] = _my_avg( dbop.select_one( "select inf_dev,inf_social from inf where repo_id=%s and computed_at<=%s order by id limit 1", (repo, dateTime), (0, 0))) M2[repo] = _my_avg( dbop.select_one( "select issue_done, commit_total, age_dev, fans_dev, fans_social from maturity where repo_id=%s and computed_at<=%s order by id limit 1", (repo, dateTime), (0, 0))) M3[repo] = _my_avg( dbop.select_one( "select repair_ratio,repair_time from quality_sub where repo_id=%s and computed_at<=%s order by id limit 1", (repo, dateTime), (0, 0))) M4[repo] = _my_avg( dbop.select_one( "select ccr,ngr,tbr from team_health where repo_id=%s and computed_at<=%s order by id limit 1", (repo, dateTime), (0, 0))) M5[repo] = _my_avg( dbop.select_one( "select dev,rel from dev_actv where repo_id=%s and computed_at<=%s order by id limit 1", (repo, dateTime), (0, 0))) M6[repo] = _my_avg( dbop.select_one( "select dit,tit,dcpt,ucpt from trend where repo_id=%s and computed_at<=%s order by id limit 1", (repo, dateTime), (0, 0))) score.append( (repo, _my_avg( [M1[repo], M2[repo], M3[repo], M4[repo], M5[repo], M6[repo]]))) score = sorted(score, key=lambda x: x[1], reverse=True) field_sql_str = "prj_id,rank,score,m1_inf,m2_maturity,m3_quality,m4_team_healty,m5_activatin,m6_trend" for i in range(0, len(score)): repo, r_score = score[i] dbop.execute( "insert into daily_rank(" + field_sql_str + ") values(%s" + ",%s" * 8 + ")", (repo, i + 1, r_score, M1[repo], M2[repo], M3[repo], M4[repo], M5[repo], M6[repo]))
def computeTrend(): # 几个重要时间点 time_now = time.time() time_now_str = _strtime_before_days(time_now, 0) time_before_1_window = _strtime_before_days(time_now, EXAMINE_WINDOW) time_before_2_window = _strtime_before_days(time_now, 2 * EXAMINE_WINDOW) time_before_3_window = _strtime_before_days(time_now, 3 * EXAMINE_WINDOW) dits, t**s, dcpts, ucpts = [], [], [], [] for repo in REPOS: if repo in NONE_GH: dits.append(None) t**s.append(None) dcpts.append(None) else: # dit commits_before_1_window = dbop.select_one( "select count(*) from commits_info where repo_id=%s and (author_date>%s and author_date<=%s)", (repo, time_before_1_window, time_now_str))[0] commits_before_2_window = dbop.select_one( "select count(*) from commits_info where repo_id=%s and (author_date>%s and author_date<=%s)", (repo, time_before_2_window, time_before_1_window))[0] commits_before_3_window = dbop.select_one( "select count(*) from commits_info where repo_id=%s and (author_date>%s and author_date<=%s)", (repo, time_before_3_window, time_before_2_window))[0] if (commits_before_2_window - commits_before_3_window) == 0: dits.append( ((commits_before_1_window - 2 * commits_before_2_window + commits_before_3_window) + 1.0) / ((commits_before_2_window - commits_before_3_window) + 1.0)) else: dits.append( (commits_before_1_window - 2 * commits_before_2_window + commits_before_3_window) / (commits_before_2_window - commits_before_3_window)) # tit issues_before_1_window = dbop.select_one( "select count(*) from issues_info where repo_id=%s and is_pr=0 and (created_at>%s and created_at<=%s)", (repo, time_before_1_window, time_now_str))[0] issues_before_2_window = dbop.select_one( "select count(*) from issues_info where repo_id=%s and is_pr=0 and (created_at>%s and created_at<=%s)", (repo, time_before_2_window, time_before_1_window))[0] issues_before_3_window = dbop.select_one( "select count(*) from issues_info where repo_id=%s and is_pr=0 and (created_at>%s and created_at<=%s)", (repo, time_before_3_window, time_before_2_window))[0] if (issues_before_2_window - issues_before_3_window) == 0: t**s.append( ((issues_before_1_window - 2 * issues_before_2_window + issues_before_3_window) + 1.0) / ((issues_before_2_window - issues_before_3_window) + 1.0)) else: t**s.append( (issues_before_1_window - 2 * issues_before_2_window + issues_before_3_window) / (issues_before_2_window - issues_before_3_window)) # dcpt fans_before_1_window = _my_sum( dbop.select_one( "select watch,star,fork from html_info where repo_id=%s and fetched_at<=%s order by fetched_at desc limit 1", (repo, time_now_str), (0, 0, 0))) fans_before_2_window = _my_sum( dbop.select_one( "select watch,star,fork from html_info where repo_id=%s and fetched_at<=%s order by fetched_at desc limit 1", (repo, time_before_1_window), (0, 0, 0))) fans_before_3_window = _my_sum( dbop.select_one( "select watch,star,fork from html_info where repo_id=%s and fetched_at<=%s order by fetched_at desc limit 1", (repo, time_before_2_window), (0, 0, 0))) if (fans_before_2_window - fans_before_3_window) == 0: dcpts.append( ((fans_before_1_window - 2 * fans_before_2_window + fans_before_3_window) + 1.0) / (fans_before_2_window - fans_before_3_window + 1.0)) else: dcpts.append((fans_before_1_window - 2 * fans_before_2_window + fans_before_3_window) / (fans_before_2_window - fans_before_3_window)) # UCPT if repo is NONE_FB and repo in NONE_TW: ucpts.append(None) else: fans_before_1_window = _socialfans_till_time(repo, time_now_str) fans_before_2_window = _socialfans_till_time( repo, time_before_1_window) fans_before_3_window = _socialfans_till_time( repo, time_before_2_window) if (fans_before_2_window - fans_before_3_window) == 0: ucpts.append( ((fans_before_1_window - 2 * fans_before_2_window + fans_before_3_window) + 1.0) / (fans_before_2_window - fans_before_3_window + 1.0)) else: ucpts.append((fans_before_1_window - 2 * fans_before_2_window + fans_before_3_window) / (fans_before_2_window - fans_before_3_window)) dits, t**s, dcpts, ucpts = _nor_data(dits), _nor_data(t**s), _nor_data( dcpts), _nor_data(ucpts) for i in range(0, len(REPOS)): dbop.execute( "insert into trend(repo_id,dit,tit,dcpt,ucpt) values(%s,%s,%s,%s,%s)", (REPOS[i], dits[i], t**s[i], dcpts[i], ucpts[i]))
def computeMaturity(): # maturity: repo_id, issue_done, commit_total, age_dev, fans_dev issue_done, commit_total, age_dev = [], [], [] stars, watchs, forks = [], [], [] fans_fb, fans_tw = [], [] metrics = [ issue_done, commit_total, age_dev, stars, watchs, forks, fans_fb, fans_tw ] # 获取每个指标 for repo_id in REPOS: if repo_id in NONE_GH: issue_done.append(None) commit_total.append(None) age_dev.append(None) stars.append(None) watchs.append(None) forks.append(None) else: # issue_done result = dbop.select_one( "select count(*) from issues_info where repo_id=%s and is_pr=0 and closed_at is not NULL", (repo_id, )) issue_done.append(result[0]) # commit_total result = dbop.select_one( "select count(*) from commits_info where repo_id=%s", (repo_id, )) commit_total.append(result[0]) # age_dev result = dbop.select_all( "select author_date from commits_info where repo_id =%s", (repo_id, )) age_dev.append(_continuous_dev_month(result)) # fans_dev result = dbop.select_one( "select watch,star,fork from html_info where repo_id=%s order by id desc limit 1", (repo_id, ), (0, 0, 0)) stars.append(result[0]) watchs.append(result[1]) forks.append(result[2]) if repo_id in NONE_FB: fans_fb.append(None) else: # fans_social result = dbop.select_one( "select watches_num from facebook_data where coin_id=%s order by id desc limit 1", (repo_id, ), (0, )) fans_fb.append(result[0]) if repo_id in NONE_TW: fans_tw.append(None) else: result = dbop.select_one( "select followers_num from twitters_data where coin_id=%s order by id desc limit 1", (repo_id, ), (0, )) fans_tw.append(result[0]) # 归一化 nor_data = [] for metric in metrics: nor_data.append(_nor_data(metric)) for i in range(0, len(REPOS)): tmp_row = [nor_metric[i] for nor_metric in nor_data] dbop.execute( "insert into maturity(repo_id, issue_done, commit_total, age_dev, fans_dev, fans_social) values(%s,%s,%s,%s,%s,%s)", (REPOS[i], tmp_row[0], tmp_row[1], tmp_row[2], _my_avg(tmp_row[3:-2]), _my_avg(tmp_row[-2:])))