Exemple #1
0
def computeDevActv():
    # 几个重要时间点
    time_now = time.time()
    time_now_str = _strtime_before_days(time_now, 0)
    time_before_1_window = _strtime_before_days(time_now, EXAMINE_WINDOW)

    # commits_before_1_window,issues_before_1_window,rel_before_1_window
    cbw, ibw, rbw = [], [], []
    metrics = [cbw, ibw, rbw]
    for repo in REPOS:
        if repo in NONE_GH:
            cbw.append(None)
            ibw.append(None)
            rbw.append(None)
        else:
            # 几个重要集合
            cbw.append(
                dbop.select_one(
                    "select count(*) from commits_info where repo_id=%s and (author_date>%s and author_date<%s)",
                    (repo, time_before_1_window, time_now_str))[0])
            ibw.append(
                dbop.select_one(
                    "select count(*) from issues_info where repo_id=%s and (created_at>%s and created_at<%s)",
                    (repo, time_before_1_window, time_now_str))[0])
            rbw.append(
                dbop.select_one(
                    "select count(*) from releases_info where repo_id=%s and (created_at>%s and created_at<%s)",
                    (repo, time_before_1_window, time_now_str))[0])
    nor_metrics = [_nor_data(item) for item in metrics]
    for i in range(0, len(REPOS)):
        dbop.execute("insert into dev_actv(repo_id,dev,rel) values(%s,%s,%s)",
                     (REPOS[i], _my_avg([nor_metrics[0][i], nor_metrics[1][i]
                                         ]), nor_metrics[2][i]))
Exemple #2
0
def computeQualitySub():

    # 缺陷修复比例,平均修复时间
    repair_ratio, repair_time = [], []
    metrics = [repair_ratio, repair_time]
    for repo in REPOS:
        if repo in NONE_GH:
            repair_ratio.append(None)
            repair_time.append(None)
        else:
            # issue_total,done
            result = dbop.select_all(
                "select closed_at,created_at from issues_info where repo_id=%s and is_pr=0",
                (repo, ))
            total_num = len(result)
            if total_num == 0:
                tmp_repair_ratio = 0
                tmp_repair_time = 0
            else:
                issue_done = [item for item in result if item[0] is not None]
                tmp_repair_ratio = len(issue_done) * 1.0 / total_num
                tmp_repair_time = sum([
                    _datetime2int(item[0]) - _datetime2int(item[1])
                    for item in issue_done
                ]) * 1.0 / len(issue_done)
            repair_ratio.append(tmp_repair_ratio)
            repair_time.append(1.0 / (tmp_repair_time + 1))

    repair_time = _nor_data(repair_time)
    for i in range(0, len(REPOS)):
        dbop.execute(
            "insert into quality_sub(repo_id,repair_ratio,repair_time) values(%s,%s,%s)",
            (REPOS[i], repair_ratio[i], repair_time[i]))
Exemple #3
0
def computeINF():
    # 几个时间点
    time_now = time.time()
    time_now_str = _strtime_before_days(time_now, 0)
    time_before_1_window = _strtime_before_days(time_now, EXAMINE_WINDOW)

    fans = [[], [], []]
    fans_fb, fans_tw = [], []
    for repo in REPOS:

        if repo in NONE_GH:  #该项目在github上没有
            for i in range(0, 3):
                fans[i].append(None)
        else:
            # 开发社区的值
            fans_now = dbop.select_one(
                "select watch,star,fork from html_info where repo_id=%s and fetched_at<=%s order by fetched_at desc limit 1",
                (repo, time_now_str), (0, 0, 0))
            fans_before = dbop.select_one(
                "select watch,star,fork from html_info where repo_id=%s and fetched_at<=%s order by fetched_at desc limit 1",
                (repo, time_before_1_window), (0, 0, 0))
            # 计算指标变化量, !!还真有变少的,
            for i in range(0, 3):
                fans[i].append(fans_now[i] - fans_before[i])

        # 社交社区
        if repo in NONE_FB:
            fans_fb.append(None)
        else:
            fb_now = dbop.select_one(
                "select watches_num from facebook_data where coin_id=%s and created_time<=%s order by created_time desc limit 1",
                (repo, time_now_str), (0, ))
            fb_before = dbop.select_one(
                "select watches_num from facebook_data where coin_id=%s and created_time<=%s order by created_time desc limit 1",
                (repo, time_before_1_window), (0, ))
            fans_fb.append(fb_now[0] - fb_before[0])

        if repo in NONE_TW:
            fans_tw.append(None)
        else:
            tw_now = dbop.select_one(
                "select followers_num from twitters_data where coin_id=%s and created_time<=%s order by created_time desc limit 1",
                (repo, time_now_str), (0, ))
            tw_before = dbop.select_one(
                "select followers_num from twitters_data where coin_id=%s and created_time<=%s order by created_time desc limit 1",
                (repo, time_before_1_window), (0, ))
            fans_tw.append(tw_now[0] - tw_before[0])

    # 归一化
    fans.extend([fans_fb, fans_tw])
    fans = [_nor_data(item) for item in fans]
    for i in range(0, len(REPOS)):
        tmp_row = []
        for j in range(0, len(fans)):
            tmp_row.append(fans[j][i])
        dbop.execute(
            "insert into inf(repo_id,inf_dev,inf_social) values(%s,%s,%s)",
            (REPOS[i], _my_avg(tmp_row[0:3]), _my_avg(tmp_row[3:])))
def _fetchCommitJson4prj(prj):
	last_page, last_data_set = _get_last_commit_fetch(prj)
	logger.info("\t\t%s: %s last commit page: %s/%s"%( threading.current_thread().name,prj,last_page,len(last_data_set)))

	# commit(id,repo_id,sha,author_id,author_name,author_date,committer_id,committer_name,committer_date,parent)
	while last_page is not None:
		
		# 下载原始并存储原始数据
		url =  URL_TEMPLATE%(prj,"commits",last_page)
		result, raw_json = _get_url(url)
		if result is None:
			break

		dbop.execute("insert into commits_json_raw(repo_id, page, raw) values(%s,%s,%s)", (
							REPO_ID[prj], last_page, raw_json))
		new_data_set = json.loads(raw_json)

		# 抽取
		logger.info("\t\t%s: %s new commit page: %s/%s"%( threading.current_thread().name,prj,last_page,len(new_data_set)))
		for n_data in new_data_set:
			if n_data["sha"] not in last_data_set:
				parents_sha = ";".join([parent["sha"] for parent in n_data["parents"]])
				author = n_data["author"] # author在github的用户名有时为空
				if author is not None and len(author)>0:
					author_id,author_name = author["id"],author["login"]
				else:
					author_id,author_name = None,None
				committer = n_data["committer"]
				if committer is not None and len(committer)>0:
					commit_id,commiter_name = committer["id"],committer["login"]
				else:
					commit_id,commiter_name = None,None
				dbop.execute("insert into commits_info(" + 
								"repo_id,page,sha,author_id,author_name,author_date,committer_id,committer_name,committer_date,parents)" + 
								" values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", 
								( REPO_ID[prj],last_page,n_data["sha"],
								author_id,author_name,_clean_datetime(n_data["commit"]["author"]["date"]),
								commit_id,commiter_name,_clean_datetime(n_data["commit"]["committer"]["date"]),parents_sha))
			
		# 以后的last_data_set 应该为空
		last_data_set = []

		# 获取下一个列表页url
		if 'link' not in result.headers.keys():
			logger.info("\t\t%s: %s maybe has less 100 commits"%(threading.current_thread().name, prj))
			break
		links = result.headers["link"]
		if "next" in links:
			last_page += 1
		else:
			last_page = None
			logger.info("\t\t%s: %s no longer have next link for commits"%(threading.current_thread().name,prj))
Exemple #5
0
def computeScore():
    M1, M2, M3, M4, M5, M6 = {}, {}, {}, {}, {}, {}
    score = []
    dateTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    for repo in REPOS:
        M1[repo] = _my_avg(
            dbop.select_one(
                "select inf_dev,inf_social from inf where repo_id=%s and computed_at<=%s order by id limit 1",
                (repo, dateTime), (0, 0)))
        M2[repo] = _my_avg(
            dbop.select_one(
                "select issue_done, commit_total, age_dev, fans_dev, fans_social from maturity where repo_id=%s and computed_at<=%s order by id limit 1",
                (repo, dateTime), (0, 0)))
        M3[repo] = _my_avg(
            dbop.select_one(
                "select repair_ratio,repair_time from quality_sub where repo_id=%s and computed_at<=%s order by id limit 1",
                (repo, dateTime), (0, 0)))
        M4[repo] = _my_avg(
            dbop.select_one(
                "select  ccr,ngr,tbr from team_health where repo_id=%s and computed_at<=%s order by id limit 1",
                (repo, dateTime), (0, 0)))
        M5[repo] = _my_avg(
            dbop.select_one(
                "select  dev,rel from dev_actv where repo_id=%s and computed_at<=%s order by id limit 1",
                (repo, dateTime), (0, 0)))
        M6[repo] = _my_avg(
            dbop.select_one(
                "select  dit,tit,dcpt,ucpt from trend where repo_id=%s and computed_at<=%s order by id limit 1",
                (repo, dateTime), (0, 0)))
        score.append(
            (repo,
             _my_avg(
                 [M1[repo], M2[repo], M3[repo], M4[repo], M5[repo],
                  M6[repo]])))

    score = sorted(score, key=lambda x: x[1], reverse=True)

    field_sql_str = "prj_id,rank,score,m1_inf,m2_maturity,m3_quality,m4_team_healty,m5_activatin,m6_trend"
    for i in range(0, len(score)):
        repo, r_score = score[i]
        dbop.execute(
            "insert into daily_rank(" + field_sql_str + ") values(%s" +
            ",%s" * 8 + ")", (repo, i + 1, r_score, M1[repo], M2[repo],
                              M3[repo], M4[repo], M5[repo], M6[repo]))
def _fetchIssueJson4Prj(prj):

	last_page, last_data_set = _get_last_issue_fetch(prj)
	logger.info("\t\t%s: %s last issue page:%s/%s"%( threading.current_thread().name,prj,last_page,len(last_data_set)))
	while last_page is not None:
		
		# 下载原始并存储原始数据
		url =  URL_TEMPLATE%(prj,"issues",last_page)
		result, raw_json = _get_url(url)
		if result is None:
			break

		dbop.execute("insert into issues_json_raw(repo_id, page, raw) values(%s,%s,%s)", (
							REPO_ID[prj], last_page, raw_json))
		new_data_set = json.loads(raw_json)

		# 抽取
		logger.info("\t\t%s: %s new issue page:%s/%s"%( threading.current_thread().name,prj,last_page,len(new_data_set)))
		for n_data in new_data_set:
			if n_data["number"] not in last_data_set:
				is_pr = 0
				if "pull_request" in n_data.keys():
					is_pr = 1
				dbop.execute("insert into issues_info" + 
						"(repo_id,number,page,is_pr,created_at,closed_at,user_id,user_name) values (%s,%s,%s,%s,%s,%s,%s,%s)", 
					( REPO_ID[prj],n_data["number"],last_page,is_pr, _clean_datetime(n_data["created_at"]),
									_clean_datetime(n_data["closed_at"]),n_data["user"]["id"],n_data["user"]["login"]
					))
			
		
		# 以后的last_data_set 应该为空
		last_data_set = []

		# 获取下一个列表页url
		if 'link' not in result.headers.keys():
			logger.info("\t\t%s: %s maybe has less 100 isse"%(threading.current_thread().name, prj))
			break
		links = result.headers["link"]
		if "next" in links:
			last_page += 1
		else:
			last_page = None
			logger.info("\t\t%s: %s no longer have next link for issue"%(threading.current_thread().name,prj))
def fetchHtmlInfo(prj):

    logger.info("\t\tfetchHtmlInfo:%s" % (prj))
    # 下载
    logger.info("\t\t download")
    url = "https://github.com/%s" % prj
    url_result = _get_url(url)
    if url_result[0] is None:
        dbop.execute("insert into html_error(repo_id,error_msg) values(%s,%s)",
                     (REPO_ID[prj], url_result[1]))
        return

    logger.info("\t\t extract")
    # 抽取
    ini_html = url_result[0]
    nums, errors = _extract_html(ini_html)

    logger.info("\t\t store")
    # 持久化
    fields = nums.keys()
    if len(fields) > 0:
        values = [nums[field] for field in fields]
        values.insert(0, "%d" % REPO_ID[prj])
        dbop.execute(
            "insert into html_info(repo_id," + ",".join(fields) +
            ") values(%s" + ",%s" * len(fields) + ")", (values))
    for error in errors:
        dbop.execute("insert into html_error(repo_id,error_msg) values(%s,%s)",
                     (REPO_ID[prj], error))
def _fetchReleaseJson4Prj(prj):
	last_page, last_data_set = _get_last_release_fetch(prj)
	logger.info("\t\t%s: %s last release page: %s/%s"%( threading.current_thread().name,prj,last_page,len(last_data_set)))

	while last_page is not None:
		
		# 下载原始并存储原始数据
		url =  URL_TEMPLATE%(prj,"releases",last_page)
		result, raw_json = _get_url(url)
		if result is None:
			break

		dbop.execute("insert into releases_json_raw(repo_id, page, raw) values(%s,%s,%s)", (
							REPO_ID[prj], last_page, raw_json))
		new_data_set = json.loads(raw_json)

		# 抽取
		logger.info("\t\t%s: %s new release page: %s/%s"%( threading.current_thread().name,prj,last_page,len(new_data_set)))
		for n_data in new_data_set:
			if n_data["id"] not in last_data_set:
				dbop.execute("insert into releases_info(" + 
								"repo_id,r_id,page,tag_name,name,created_at,published_at,author_id,author_name)" + 
								" values(%s,%s,%s,%s,%s,%s,%s,%s,%s)", 
								( REPO_ID[prj],n_data["id"],last_page,n_data["tag_name"],n_data["name"],_clean_datetime(n_data["created_at"]),
								_clean_datetime(n_data["published_at"]),n_data["author"]["id"],n_data["author"]["login"]))
			
		# 以后的last_data_set 应该为空
		last_data_set = []

		# 获取下一个列表页url
		if 'link' not in result.headers.keys():
			logger.info("\t\t%s: %s maybe has less 100 releases"%(threading.current_thread().name, prj))
			break
		links = result.headers["link"]
		if "next" in links:
			last_page += 1
		else:
			last_page = None
			logger.info("\t\t%s: %s no longer have next link for releases"%(threading.current_thread().name,prj))
def main():

    # 先清空之前的ghurl
    dbop.execute("update prj_list set github_url=Null")

    # 更新ghurl字段
    with open("prjs.txt", "r") as fp:
        for prj_line in fp.readlines():
            prjls = [item.strip() for item in prj_line.split("\t")]

            dbop.execute("update prj_list set github_url=%s where prj_id=%s",
                         (prjls[1], prjls[0]))
    # 加个字段
    dbop.execute("alter table prj_list add prj_type varchar(50);")
    dbop.execute("update prj_list set prj_type='blockchain';")
Exemple #10
0
def computeTrend():
    # 几个重要时间点
    time_now = time.time()
    time_now_str = _strtime_before_days(time_now, 0)
    time_before_1_window = _strtime_before_days(time_now, EXAMINE_WINDOW)
    time_before_2_window = _strtime_before_days(time_now, 2 * EXAMINE_WINDOW)
    time_before_3_window = _strtime_before_days(time_now, 3 * EXAMINE_WINDOW)

    dits, t**s, dcpts, ucpts = [], [], [], []
    for repo in REPOS:
        if repo in NONE_GH:
            dits.append(None)
            t**s.append(None)
            dcpts.append(None)
        else:
            # dit
            commits_before_1_window = dbop.select_one(
                "select count(*) from commits_info where repo_id=%s and (author_date>%s and author_date<=%s)",
                (repo, time_before_1_window, time_now_str))[0]
            commits_before_2_window = dbop.select_one(
                "select count(*) from commits_info where repo_id=%s and (author_date>%s and author_date<=%s)",
                (repo, time_before_2_window, time_before_1_window))[0]
            commits_before_3_window = dbop.select_one(
                "select count(*) from commits_info where repo_id=%s and (author_date>%s and author_date<=%s)",
                (repo, time_before_3_window, time_before_2_window))[0]
            if (commits_before_2_window - commits_before_3_window) == 0:
                dits.append(
                    ((commits_before_1_window - 2 * commits_before_2_window +
                      commits_before_3_window) + 1.0) /
                    ((commits_before_2_window - commits_before_3_window) +
                     1.0))
            else:
                dits.append(
                    (commits_before_1_window - 2 * commits_before_2_window +
                     commits_before_3_window) /
                    (commits_before_2_window - commits_before_3_window))

            # tit
            issues_before_1_window = dbop.select_one(
                "select count(*) from issues_info where repo_id=%s and is_pr=0 and (created_at>%s and created_at<=%s)",
                (repo, time_before_1_window, time_now_str))[0]
            issues_before_2_window = dbop.select_one(
                "select count(*) from issues_info where repo_id=%s and is_pr=0 and (created_at>%s and created_at<=%s)",
                (repo, time_before_2_window, time_before_1_window))[0]
            issues_before_3_window = dbop.select_one(
                "select count(*) from issues_info where repo_id=%s and is_pr=0 and (created_at>%s and created_at<=%s)",
                (repo, time_before_3_window, time_before_2_window))[0]
            if (issues_before_2_window - issues_before_3_window) == 0:
                t**s.append(
                    ((issues_before_1_window - 2 * issues_before_2_window +
                      issues_before_3_window) + 1.0) /
                    ((issues_before_2_window - issues_before_3_window) + 1.0))
            else:
                t**s.append(
                    (issues_before_1_window - 2 * issues_before_2_window +
                     issues_before_3_window) /
                    (issues_before_2_window - issues_before_3_window))

            # dcpt
            fans_before_1_window = _my_sum(
                dbop.select_one(
                    "select watch,star,fork from html_info where repo_id=%s and fetched_at<=%s order by fetched_at desc limit 1",
                    (repo, time_now_str), (0, 0, 0)))
            fans_before_2_window = _my_sum(
                dbop.select_one(
                    "select watch,star,fork from html_info where repo_id=%s and fetched_at<=%s order by fetched_at desc limit 1",
                    (repo, time_before_1_window), (0, 0, 0)))
            fans_before_3_window = _my_sum(
                dbop.select_one(
                    "select watch,star,fork from html_info where repo_id=%s and fetched_at<=%s order by fetched_at desc limit 1",
                    (repo, time_before_2_window), (0, 0, 0)))
            if (fans_before_2_window - fans_before_3_window) == 0:
                dcpts.append(
                    ((fans_before_1_window - 2 * fans_before_2_window +
                      fans_before_3_window) + 1.0) /
                    (fans_before_2_window - fans_before_3_window + 1.0))
            else:
                dcpts.append((fans_before_1_window - 2 * fans_before_2_window +
                              fans_before_3_window) /
                             (fans_before_2_window - fans_before_3_window))

            # UCPT
        if repo is NONE_FB and repo in NONE_TW:
            ucpts.append(None)
        else:
            fans_before_1_window = _socialfans_till_time(repo, time_now_str)
            fans_before_2_window = _socialfans_till_time(
                repo, time_before_1_window)
            fans_before_3_window = _socialfans_till_time(
                repo, time_before_2_window)
            if (fans_before_2_window - fans_before_3_window) == 0:
                ucpts.append(
                    ((fans_before_1_window - 2 * fans_before_2_window +
                      fans_before_3_window) + 1.0) /
                    (fans_before_2_window - fans_before_3_window + 1.0))
            else:
                ucpts.append((fans_before_1_window - 2 * fans_before_2_window +
                              fans_before_3_window) /
                             (fans_before_2_window - fans_before_3_window))
    dits, t**s, dcpts, ucpts = _nor_data(dits), _nor_data(t**s), _nor_data(
        dcpts), _nor_data(ucpts)
    for i in range(0, len(REPOS)):
        dbop.execute(
            "insert into trend(repo_id,dit,tit,dcpt,ucpt) values(%s,%s,%s,%s,%s)",
            (REPOS[i], dits[i], t**s[i], dcpts[i], ucpts[i]))
Exemple #11
0
def computeTeamHealth():

    # 几个重要时间点
    time_now = time.time()
    time_now_str = _strtime_before_days(time_now, 0)
    time_before_1_window = _strtime_before_days(time_now, EXAMINE_WINDOW)
    time_before_2_window = _strtime_before_days(time_now, 2 * EXAMINE_WINDOW)
    time_before_3_window = _strtime_before_days(time_now, 3 * EXAMINE_WINDOW)

    ccrs, ngrs, tbrs = [], [], []
    for repo in REPOS:
        if repo in NONE_GH:
            ccrs.append(None)
            ngrs.append(None)
            tbrs.append(None)
        else:
            # 几个重要集合
            data_before_1_window = set([
                item[0] for item in dbop.select_all(
                    "select author_id from commits_info where repo_id=%s and author_id is not null and (author_date>%s and author_date<%s)",
                    (repo, time_before_1_window, time_now_str))
            ])
            data_before_2_window = set([
                item[0] for item in dbop.select_all(
                    "select author_id from commits_info where repo_id=%s and author_id is not null and (author_date>%s and author_date<%s)",
                    (repo, time_before_2_window, time_before_1_window))
            ])
            data_before_3_window = set([
                item[0] for item in dbop.select_all(
                    "select author_id from commits_info where repo_id=%s and author_id is not null and (author_date>%s and author_date<%s)",
                    (repo, time_before_3_window, time_before_2_window))
            ])
            # ccr
            data_common = _common_num(data_before_1_window,
                                      data_before_2_window)
            ccrs.append(data_common * 1.0 /
                        (len(data_before_2_window) + 1))  #避免分母为0
            # ngr
            new_users_1 = len(data_before_1_window) - data_common + 1  #避免分母为0
            data_common_2 = _common_num(data_before_3_window,
                                        data_before_2_window)
            new_users_2 = len(
                data_before_2_window) - data_common_2 + 1  #避免分母为0
            ngrs.append((new_users_1 - new_users_2) * 1.0 / new_users_2)
            # tbr 上一个窗口期的
            commits_dis = dbop.select_all(
                "select count(*) from commits_info where repo_id=%s and author_id is not null group by author_id",
                (repo, ))
            issues_dis = dbop.select_all(
                "select count(*) from issues_info where repo_id=%s and user_id is not null group by user_id",
                (repo, ))
            tbrs.append(1.0 / (_gini([item[0] for item in commits_dis]) +
                               _gini([item[0] for item in issues_dis]) + 1))

    metrics = []
    metrics.append(_nor_data(ccrs))
    metrics.append(_nor_data(ngrs))
    metrics.append(_nor_data(tbrs))
    for i in range(0, len(REPOS)):
        tmp_row = [REPOS[i]]
        for j in range(0, len(metrics)):
            tmp_row.append(metrics[j][i])
        dbop.execute(
            "insert into team_health(repo_id, ccr,ngr,tbr) values(%s,%s,%s,%s)",
            tmp_row)
Exemple #12
0
def computeMaturity():
    # maturity: repo_id, issue_done, commit_total, age_dev, fans_dev

    issue_done, commit_total, age_dev = [], [], []
    stars, watchs, forks = [], [], []
    fans_fb, fans_tw = [], []
    metrics = [
        issue_done, commit_total, age_dev, stars, watchs, forks, fans_fb,
        fans_tw
    ]

    # 获取每个指标
    for repo_id in REPOS:
        if repo_id in NONE_GH:
            issue_done.append(None)
            commit_total.append(None)
            age_dev.append(None)
            stars.append(None)
            watchs.append(None)
            forks.append(None)
        else:
            # issue_done
            result = dbop.select_one(
                "select count(*) from issues_info where repo_id=%s and is_pr=0 and closed_at is not NULL",
                (repo_id, ))
            issue_done.append(result[0])

            # commit_total
            result = dbop.select_one(
                "select count(*) from commits_info where repo_id=%s",
                (repo_id, ))
            commit_total.append(result[0])

            # age_dev
            result = dbop.select_all(
                "select author_date from commits_info where repo_id =%s",
                (repo_id, ))
            age_dev.append(_continuous_dev_month(result))

            # fans_dev
            result = dbop.select_one(
                "select watch,star,fork from html_info where repo_id=%s order by id desc limit 1",
                (repo_id, ), (0, 0, 0))
            stars.append(result[0])
            watchs.append(result[1])
            forks.append(result[2])

        if repo_id in NONE_FB:
            fans_fb.append(None)
        else:
            # fans_social
            result = dbop.select_one(
                "select watches_num from facebook_data where coin_id=%s order by id desc limit 1",
                (repo_id, ), (0, ))
            fans_fb.append(result[0])

        if repo_id in NONE_TW:
            fans_tw.append(None)
        else:
            result = dbop.select_one(
                "select followers_num from twitters_data where coin_id=%s order by id desc limit 1",
                (repo_id, ), (0, ))
            fans_tw.append(result[0])

    # 归一化
    nor_data = []
    for metric in metrics:
        nor_data.append(_nor_data(metric))

    for i in range(0, len(REPOS)):
        tmp_row = [nor_metric[i] for nor_metric in nor_data]
        dbop.execute(
            "insert into maturity(repo_id, issue_done, commit_total, age_dev, fans_dev, fans_social) values(%s,%s,%s,%s,%s,%s)",
            (REPOS[i], tmp_row[0], tmp_row[1], tmp_row[2],
             _my_avg(tmp_row[3:-2]), _my_avg(tmp_row[-2:])))
Exemple #13
0
	req = urllib2.Request(url,headers = send_headers)
	try:
		error_msg = None
		result = urllib2.urlopen(req,timeout=20)
		raw_data = result.read().decode('utf-8')
		logger.info("\t\t%s: downloaded:\t%s:%s"%(threading.current_thread().name,url[28:-37],token[1:8]))
	except urllib2.HTTPError, e:
		error_msg = e.code
	except urllib2.URLError, e:
		error_msg = e.reason
	except Exception,e:
		error_msg = e.message
		
	if error_msg != None:
		dbop.execute("insert into json_error(url,error) values(%s,%s)", (url, error_msg))
		logger.info("\t\t%s: error_msg:\t%s,%s:%s"%(threading.current_thread().name,error_msg,url[28:-37],token[1:8]))
		if retry_times == 0:
			return None,None
		else:
			logger.info("\t\t%s: retry:\t%s:%s"%(threading.current_thread().name,url[28:-37],token[1:8]))
			return _get_url(url,retry_times-1)
	
	return result, raw_data

def _get_last_issue_fetch(prj):
	# 获取上次记录,以及上次获得数据集合
	last_page = dbop.select_one("select page from issues_json_raw  where repo_id=%s order by id desc limit 1",
								(REPO_ID[prj],), (1,))[0]
	last_data_set = set([ item[0] for item in 
						dbop.select_all("select number from issues_info where repo_id=%s and page =%s", (