def queryValuesFromTable(tableName, items, valueDict): """查询数据库""" ret = [] conn = DataBaseOpenHelper.connect() cursor = conn.cursor() format_values = SqlUtils.getQueryTableConditionString(items) sql = SqlUtils.STR_SQL_QUERY_TABLE_UTILS.format(tableName, format_values) if configPraser.getPrintMode(): print(sql) values = () if items is not None: for item in items: values = values + (valueDict.get(item, None),) # 元组相加 try: cursor.execute(sql, values) ret = cursor.fetchall() if configPraser.getPrintMode(): print(ret) except Exception as e: print(e) conn.close() return ret
async def insertValuesIntoTable(self, tableName, items, valueDict, primaryKeys=None): """插入语句""" conn, cur = await self.getCursor() format_table = SqlUtils.getInsertTableFormatString(tableName, items) format_values = SqlUtils.getInsertTableValuesString(items.__len__()) if configPraser.getPrintMode(): print(format_table) print(format_values) sql = SqlUtils.STR_SQL_INSERT_TABLE_UTILS.format(format_table, format_values) if configPraser.getPrintMode(): print(sql) values = () for item in items: values = values + (valueDict.get(item, None),) # 元组相加 try: await cur.execute(sql, values) except Exception as e: print(e) finally: if cur: await cur.close() await self.pool.release(conn)
def insertValuesIntoTable(tableName, items, valueDict, primaryKeys=None): """插入语句""" res = SqlExecuteHelper.queryValuesFromTable(tableName, primaryKeys, valueDict) if res is not None and res.__len__() > 0: if configPraser.getPrintMode(): print('数据重复插入失败') return conn = DataBaseOpenHelper.connect() cursor = conn.cursor() format_table = SqlUtils.getInsertTableFormatString(tableName, items) format_values = SqlUtils.getInsertTableValuesString(items.__len__()) if configPraser.getPrintMode(): print(format_table) print(format_values) sql = SqlUtils.STR_SQL_INSERT_TABLE_UTILS.format(format_table, format_values) if configPraser.getPrintMode(): print(sql) values = () for item in items: values = values + (valueDict.get(item, None),) # 元组相加 try: cursor.execute(sql, values) conn.commit() except Exception as e: print(e) conn.rollback() conn.close()
def saveUserInformationToDB(helper, user): # user信息录入数据库 先查询数据库再,如果信息不完整再请求 if user is not None and user.login is not None: res = SqlExecuteHelper.queryValuesFromTable(SqlUtils.STR_TABLE_NAME_USER, user.getIdentifyKeys(), user.getValueDict()) if res is None or res.__len__() == 0: if configPraser.getPrintMode(): print('新用户 从git中获取信息') user = helper.getInformationForUser(user.login) SqlExecuteHelper.insertValuesIntoTable(SqlUtils.STR_TABLE_NAME_USER , user.getItemKeyList() , user.getValueDict() , user.getIdentifyKeys()) else: if configPraser.getPrintMode(): print(type(configPraser.getPrintMode())) print('老用户 不必获取')
async def updateBeanDateList(beans, mysql): """一次性更新多个bean对象 讲道理结构是被破坏的,但是可以吧所有数据库请求压缩为一次""" conn, cur = await mysql.getDatabaseConnected() try: for bean in beans: if isinstance(bean, BeanBase): tableName = AsyncSqlHelper.getInsertTableName(bean) valueDict = bean.getValueDict() format_target = SqlUtils.getUpdateTableSetString(bean.getItemKeyList()) format_condition = SqlUtils.getQueryTableConditionString(bean.getIdentifyKeys()) sql = SqlUtils.STR_SQL_UPDATE_TABLE_UTILS.format(tableName, format_target, format_condition) if configPraser.getPrintMode(): print(sql) values = () for item in bean.getItemKeyList(): values = values + (valueDict.get(item, None),) for item in bean.getIdentifyKeys(): values = values + (valueDict.get(item, None),) # 元组相加 try: await cur.execute(sql, values) except Exception as e: print(e) except Exception as e: print(e) finally: if cur: await cur.close() await mysql.pool.release(conn)
async def storeBeanDateList(beans, mysql): """一次性存储多个bean对象 讲道理结构是被破坏的,但是可以吧所有数据库请求压缩为一次""" conn, cur = await mysql.getDatabaseConnected() try: for bean in beans: if isinstance(bean, BeanBase): tableName = AsyncSqlHelper.getInsertTableName(bean) items = bean.getItemKeyList() valueDict = bean.getValueDict() format_table = SqlUtils.getInsertTableFormatString(tableName, items) format_values = SqlUtils.getInsertTableValuesString(items.__len__()) sql = SqlUtils.STR_SQL_INSERT_TABLE_UTILS.format(format_table, format_values) if configPraser.getPrintMode(): print(sql) values = () for item in items: values = values + (valueDict.get(item, None),) # 元组相加 try: await cur.execute(sql, values) except Exception as e: print(e) except Exception as e: print(e) finally: if cur: await cur.close() await mysql.pool.release(conn)
def updateValuesFromTable(tableName, targets, targetsDict, conditions, conditionsDict): """修改某张表""" conn = DataBaseOpenHelper.connect() cursor = conn.cursor() format_target = SqlUtils.getUpdateTableSetString(targets) format_condition = SqlUtils.getQueryTableConditionString(conditions) sql = SqlUtils.STR_SQL_UPDATE_TABLE_UTILS.format(tableName, format_target, format_condition) if configPraser.getPrintMode(): print(sql) values = () if targets is not None: for item in targets: values = values + (targetsDict.get(item, None),) if conditions is not None: for item in conditions: values = values + (conditionsDict.get(item, None),) # 元组相加 try: cursor.execute(sql, values) conn.commit() except Exception as e: print(e) conn.rollback() conn.close()
def getBeansFromTuple(beanClass, columns, dataTuple): result = [] if configPraser.getPrintMode(): print(columns) if isinstance(beanClass, BeanBase): """采用反射机制实例化对象""" obj = beanClass.__class__() itemList = obj.getItemKeyList() for index, item in enumerate(itemList): value = BeanParserHelper.findItemInArray( item, columns, dataTuple) if value is not None: setattr(obj, item, value) result.append(obj) if configPraser.getPrintMode(): print(result.__len__()) return result
async def getProxy(): """获取代理ip池中的ip 详细看 ProxyHelper""" if configPraser.getProxy(): proxy = await ProxyHelper.getAsyncSingleProxy() if configPraser.getPrintMode(): print(proxy) if proxy is not None: return StringKeyUtils.STR_PROXY_HTTP_FORMAT.format(proxy) return None
def connect(): conn = pymssql.connect(configPraser.getDataBaseHost(), configPraser.getDataBaseUserName(), configPraser.getDataBasePassword()) if conn: if configPraser.getPrintMode(): print('数据库连接成功,host:', configPraser.getDataBaseHost(), ' user:', configPraser.getDataBaseUserName()) return conn
async def preProcessUserFollowList(loop, statistic, userList): semaphore = asyncio.Semaphore(configPraser.getSemaphore()) # 对速度做出限制 mysql = await getMysqlObj(loop) if configPraser.getPrintMode(): print("mysql init success") tasks = [asyncio.ensure_future(AsyncApiHelper.downloadUserFollowList(login, semaphore, mysql, statistic)) for login in userList] # 可以通过nodes 过多次嵌套节省请求数量 await asyncio.wait(tasks)
def getBeanIdentifyTuple(bean): res = None if isinstance(bean, BeanBase): valuesDict = bean.getValueDict() identifyKeys = bean.getIdentifyKeys() res = () for item in identifyKeys: res = res + (valuesDict[item],) if configPraser.getPrintMode(): print(res) return res
async def preProcessUnmatchCommits(loop, statistic): semaphore = asyncio.Semaphore(configPraser.getSemaphore()) # 对速度做出限制 mysql = await getMysqlObj(loop) if configPraser.getPrintMode(): print("mysql init success") res = await AsyncSqlHelper.query(mysql, SqlUtils.STR_SQL_QUERY_UNMATCH_COMMITS, None) print(res) tasks = [asyncio.ensure_future(AsyncApiHelper.downloadCommits(item[0], item[1], semaphore, mysql, statistic)) for item in res] # 可以通过nodes 过多次嵌套节省请求数量 await asyncio.wait(tasks)
async def queryBeanData(beans, mysql, defineItems=None): """一次性查询多个bean对象 define 为[[key1,key2], [key3,key4] ...] 返回多个元组 [((),),((),())...]""" conn, cur = await mysql.getDatabaseConnected() resultBeans = [] try: pos = 0 for bean in beans: if isinstance(bean, BeanBase): tableName = AsyncSqlHelper.getInsertTableName(bean) items = defineItems[pos] if items is None: items = bean.getIdentifyKeys() pos += 1 valueDict = bean.getValueDict() format_values = SqlUtils.getQueryTableConditionString( items) sql = SqlUtils.STR_SQL_QUERY_TABLE_UTILS.format( tableName, format_values) if configPraser.getPrintMode(): print(sql) values = () for item in items: values = values + (valueDict.get(item, None), ) # 元组相加 try: await cur.execute(sql, values) r = await cur.fetchall() resultBeans.append(r) except Exception as e: print(e) resultBeans.append(None) except Exception as e: print(e) finally: if cur: await cur.close() await mysql.pool.release(conn) return resultBeans
async def preProcess(loop, limit, start, statistic): """准备工作""" semaphore = asyncio.Semaphore(configPraser.getSemaphore()) # 对速度做出限制 """初始化数据库""" mysql = await getMysqlObj(loop) if configPraser.getPrintMode(): print("mysql init success") """多协程""" if configPraser.getApiVersion() == StringKeyUtils.API_VERSION_RESET: tasks = [asyncio.ensure_future(AsyncApiHelper.downloadInformation(pull_number, semaphore, mysql, statistic)) for pull_number in range(start, max(start - limit, 0), -1)] elif configPraser.getApiVersion() == StringKeyUtils.API_VERSION_GRAPHQL: tasks = [ asyncio.ensure_future(AsyncApiHelper.downloadInformationByV4(pull_number, semaphore, mysql, statistic)) for pull_number in range(start, max(start - limit, 0), -1)] await asyncio.wait(tasks)
async def preProcessUnmatchCommitFile(loop, statistic): semaphore = asyncio.Semaphore(configPraser.getSemaphore()) # 对速度做出限制 mysql = await getMysqlObj(loop) if configPraser.getPrintMode(): print("mysql init success") print("mysql init success") fetch_size = 2000 total = await AsyncSqlHelper.query(mysql, SqlUtils.STR_SQL_QUERY_UNMATCH_COMMIT_FILE_COUNT_BY_HAS_FETCHED_FILE, None) fetch_loop = int(total[0][0] / fetch_size) for i in range(0, fetch_loop): start = random.randint(0, fetch_loop - 1) res = await AsyncSqlHelper.query(mysql, SqlUtils.STR_SQL_QUERY_UNMATCH_COMMIT_FILE_BY_HAS_FETCHED_FILE, [start * fetch_size]) print(res) tasks = [ asyncio.ensure_future(AsyncApiHelper.downloadCommits(item[0], item[1], semaphore, mysql, statistic)) for item in res] # 可以通过nodes 过多次嵌套节省请求数量 await asyncio.wait(tasks)
def deleteValuesFromTable(tableName, items, valueDict): """删除某张表""" conn = DataBaseOpenHelper.connect() cursor = conn.cursor() format_values = SqlUtils.getQueryTableConditionString(items) sql = SqlUtils.STR_SQL_DELETE_TABLE_UTILS.format(tableName, format_values) if configPraser.getPrintMode(): print(sql) values = () if items is not None: for item in items: values = values + (valueDict.get(item, None),) # 元组相加 try: cursor.execute(sql, values) conn.commit() except Exception as e: print(e) conn.rollback() conn.close()
async def preProcessNoOriginLineReviewComment(loop, statistic, owner, repo, min_num, max_num): semaphore = asyncio.Semaphore(configPraser.getSemaphore()) # 对速度做出限制 mysql = await getMysqlObj(loop) if configPraser.getPrintMode(): print("mysql init success") print("mysql init success") repoName = owner + '/' + repo values = [repoName, repoName, min_num, max_num] total = await AsyncSqlHelper.query(mysql, SqlUtils.STR_SQL_QUERY_NO_ORIGINAL_LINE_REVIEW_COMMENT_COUNT, values) fetch_loop = math.ceil(total[0][0] / 2000) for i in range(0, fetch_loop): res = await AsyncSqlHelper.query(mysql, SqlUtils.STR_SQL_QUERY_NO_ORIGINAL_LINE_REVIEW_COMMENT , values) print("fetched size:", res.__len__()) tasks = [asyncio.ensure_future( AsyncApiHelper.downloadSingleReviewComment(repoName, item[0], semaphore, mysql, statistic)) for item in res] # 可以通过nodes 过多次嵌套节省请求数量 await asyncio.wait(tasks)
def demo(): data = pandasHelper.readTSVFile(projectConfig.getFPSTestData(), pandasHelper.INT_READ_FILE_WITHOUT_HEAD) print("input data:", data.shape) startTime = datetime.now() # print(DataFrameColumnUtils.COLUMN_REVIEW_FPS) """导入pullrequest, review,file,commit数据""" pullrequests, pullrequestsIndex = \ BeanNumpyHelper.getBeansFromDataFrame(PullRequest(), DataFrameColumnUtils.COLUMN_REVIEW_FPS_PULL_REQUEST, data) # if configPraser.getPrintMode(): # print(pullrequests.__len__()) # print(pullrequestsIndex) time2 = datetime.now() print("pull request cost time:", time2 - startTime) reviews, reviewsIndex = BeanNumpyHelper.getBeansFromDataFrame(Review(), DataFrameColumnUtils.COLUMN_REVIEW_FPS_REVIEW, data) time3 = datetime.now() print("review cost time:", time3 - time2) if configPraser.getPrintMode(): print(reviews) print(reviewsIndex) commits, commitsIndex = BeanNumpyHelper.getBeansFromDataFrame(Commit(), DataFrameColumnUtils.COLUMN_REVIEW_FPS_COMMIT, data) time4 = datetime.now() print("commits cost time:", time4 - time3) # if configPraser.getPrintMode(): # print(commits) # print(commitsIndex) files, filesIndex = BeanNumpyHelper.getBeansFromDataFrame(File(), DataFrameColumnUtils.COLUMN_REVIEW_FPS_FILE, data) time5 = datetime.now() print("file cost time:", time5 - time4) # if configPraser.getPrintMode(): # print(files) # print(filesIndex) pullrequestReviewIndex = BeanNumpyHelper.beanAssociate(pullrequests, [StringKeyUtils.STR_KEY_REPO_FULL_NAME, StringKeyUtils.STR_KEY_NUMBER], reviews, [StringKeyUtils.STR_KEY_REPO_FULL_NAME, StringKeyUtils.STR_KEY_PULL_NUMBER]) time6 = datetime.now() print("pull request index time:", time6 - time5) # if configPraser.getPrintMode(): # print(pullrequestReviewIndex) reviewCommitIndex = BeanNumpyHelper.beanAssociate(reviews, [StringKeyUtils.STR_KEY_COMMIT_ID], commits, [StringKeyUtils.STR_KEY_SHA]) time7 = datetime.now() print("commits index cost time:", time7 - time6) # # if configPraser.getPrintMode(): # print(reviewCommitIndex) commitFileIndex = BeanNumpyHelper.beanAssociate(commits, [StringKeyUtils.STR_KEY_SHA], files, [StringKeyUtils.STR_KEY_COMMIT_SHA]) time8 = datetime.now() print("files index cost time:", time8 - time7) # if configPraser.getPrintMode(): # print(commitFileIndex) receiveTime = datetime.now() print("load cost time:", receiveTime - startTime) """用于做评价的结果收集""" recommendList = [] answerList = [] testNumber = configPraser.getTestNumber() if configPraser.getFPSCtypes(): """调用dll库实现增加运行速度""" dll = CDLL("cFPS.dll") dll.addf.restype = c_float dll.addf.argtypes = [c_float, c_float] print(dll.addf(10, 30)) c_prs = FPSClassCovert.convertPullRequest(pullrequests) c_reviews = FPSClassCovert.convertReview(reviews) c_commits = FPSClassCovert.convertCommit(commits) c_files = FPSClassCovert.convertFile(files) c_result = c_fps_result() print(c_prs) print(c_reviews) print(c_commits) print(c_files) dll.FPS.restype = None dll.FPS.argtypes = (POINTER(c_fps_pr), c_int, POINTER(c_fps_review), c_int, POINTER(c_fps_commit), c_int, POINTER(c_fps_file), c_int, POINTER(c_fps_result), c_int, c_int) prs_num = c_prs.__len__() p_c_prs = (c_fps_pr * prs_num)(*c_prs) reviews_num = c_reviews.__len__() p_c_reviews = (c_fps_review * reviews_num)(*c_reviews) commits_num = c_commits.__len__() p_c_commits = (c_fps_commit * commits_num)(*c_commits) files_num = c_files.__len__() p_c_files = (c_fps_file * files_num)(*c_files) dll.FPS(p_c_prs, prs_num, p_c_reviews, reviews_num, p_c_commits, commits_num, p_c_files, files_num, pointer(c_result), 0, 10, True) endTime = datetime.now() print("total cost time:", endTime - startTime, " recommend cost time:", endTime - receiveTime) print("answer:", str(c_result.answer, encoding='utf-8')) print("recommend:", str(c_result.recommend, encoding='utf-8')) else: """使用Python实现算法""" for pos in range(0, testNumber): """通过review算法获取推荐名单""" candicateList, authorList = FPSAlgorithm.reviewerRecommend(pullrequests, pullrequestsIndex, reviews, reviewsIndex, commits, commitsIndex, files, filesIndex, pullrequestReviewIndex, reviewCommitIndex, commitFileIndex, pos, configPraser.getReviewerNumber()) print("candicateList", candicateList) endTime = datetime.now() print("total cost time:", endTime - startTime, " recommend cost time:", endTime - receiveTime) recommendList.append(candicateList) answerList.append(authorList)