Exemple #1
0
 async def getProxy():
     """获取代理ip池中的ip  详细看 ProxyHelper"""
     if configPraser.getProxy():
         proxy = await ProxyHelper.getAsyncSingleProxy()
         if configPraser.getPrintMode():
             print(proxy)
         if proxy is not None:
             return StringKeyUtils.STR_PROXY_HTTP_FORMAT.format(proxy)
     return None
    async def postGraphqlData(session, api, query=None, args=None):
        """通过 github graphhql接口 通过post请求"""
        headers = {}
        headers = AsyncApiHelper.getUserAgentHeaders(headers)
        headers = AsyncApiHelper.getAuthorizationHeaders(headers)
        headers = AsyncApiHelper.getContentTypeHeaders(headers)

        body = {}
        body = GraphqlHelper.getGraphlQuery(body, query)
        body = GraphqlHelper.getGraphqlVariables(body, args)
        bodyJson = json.dumps(body)
        # print("bodyjson:", bodyJson)

        while True:
            proxy = await AsyncApiHelper.getProxy()
            if configPraser.getProxy() and proxy is None:  # 对代理池没有ip的情况做考虑
                print('no proxy and sleep!')
                await asyncio.sleep(20)
            else:
                break

        try:
            async with session.post(api,
                                    ssl=False,
                                    proxy=proxy,
                                    headers=headers,
                                    timeout=configPraser.getTimeout(),
                                    data=bodyJson) as response:
                print(
                    "rate:",
                    response.headers.get(
                        StringKeyUtils.STR_HEADER_RATE_LIMIT_REMIAN))
                print("status:", response.status)
                if response.status == 403:
                    await ProxyHelper.judgeProxy(
                        proxy.split('//')[1], ProxyHelper.INT_KILL_POINT)
                    raise 403
                elif proxy is not None:
                    await ProxyHelper.judgeProxy(
                        proxy.split('//')[1], ProxyHelper.INT_POSITIVE_POINT)
                return await response.json()
        except Exception as e:
            print(e)
            if proxy is not None:
                proxy = proxy.split('//')[1]
                await ProxyHelper.judgeProxy(proxy,
                                             ProxyHelper.INT_NEGATIVE_POINT)
            print("judge end")
            return await AsyncApiHelper.postGraphqlData(
                session, api, query, args)
    async def fetchBeanData(session, api, isMediaType=False):
        """异步获取数据通用接口(重要)"""
        """初始化请求头"""
        headers = {}
        headers = AsyncApiHelper.getUserAgentHeaders(headers)
        headers = AsyncApiHelper.getPrivateTokensHeaders(
            headers)  # 现在用token好似有点问题 先注释掉 2020.10.7

        while True:
            """对单个请求循环判断 直到请求成功或者错误"""
            """获取代理ip  ip获取需要运行代理池"""
            proxy = await AsyncApiHelper.getProxy()
            if configPraser.getProxy() and proxy is None:  # 对代理池没有ip的情况做考虑
                print('no proxy and sleep!')
                await asyncio.sleep(20)
            else:
                break

        try:
            async with session.get(
                    api,
                    ssl=False,
                    proxy=proxy,
                    headers=headers,
                    timeout=configPraser.getTimeout()) as response:
                print(
                    "rate:",
                    response.headers.get(
                        StringKeyUtils.STR_HEADER_RATE_LIMIT_REMIAN))
                print("status:", response.status)
                if response.status == 403:
                    await ProxyHelper.judgeProxy(
                        proxy.split('//')[1], ProxyHelper.INT_KILL_POINT)
                    raise 403
                elif proxy is not None:
                    await ProxyHelper.judgeProxy(
                        proxy.split('//')[1], ProxyHelper.INT_POSITIVE_POINT)
                return await response.json()
        except Exception as e:
            """非 403的网络请求出错  循环重试"""
            print(e)
            if proxy is not None:
                proxy = proxy.split('//')[1]
                await ProxyHelper.judgeProxy(proxy,
                                             ProxyHelper.INT_NEGATIVE_POINT)
            # print("judge end")
            """循环重试"""
            return await AsyncApiHelper.fetchBeanData(session,
                                                      api,
                                                      isMediaType=isMediaType)
Exemple #4
0
    def getAllDataForProject(owner, repo):

        helper = ApiHelper(owner=owner, repo=repo)
        helper.setAuthorization(True)
        helper.setUseProxyPool(configPraser.getProxy())

        statistic = statisticsHelper()
        statistic.startTime = datetime.now()

        '''提取项目的信息以及项目的owner信息'''
        ProjectAllDataFetcher.getDataForRepository(helper)
        '''提取项目的pull request信息'''
        # ProjectAllDataFetcher.getPullRequestForRepositoryUseConcurrent(helper, limit=configPraser.getLimit(),
        #                                                                statistic=statistic,
        #                                                                start=configPraser.getStart())

        statistic.endTime = datetime.now()

        print("useful pull request:", statistic.usefulRequestNumber,
              " useful review:", statistic.usefulReviewNumber,
              " useful review comment:", statistic.usefulReviewCommentNumber,
              " useful issue comment:", statistic.usefulIssueCommentNumber,
              " useful commit:", statistic.usefulCommitNumber,
              " cost time:", (statistic.endTime - statistic.startTime).seconds)