예제 #1
0
    def spider(item):
        nonlocal total_count
        nonlocal post_poor
        nonlocal error_count
        error = True
        try:
            total_count += 1
            if error:
                for themelinkfun in themes:
                    if not error:
                        break
                    error = themelinkfun.get_last_post(item, post_poor)
            if error:
                print("-----------获取主页信息失败,采取sitemap策略----------")
                error, post_poor = sitmap_get(item, post_poor)

        except Exception as e:
            print('\n')
            print(item, "运用主页及sitemap爬虫爬取失败!请检查")
            print('\n')
            print(e)
            error_count += 1

        if error: error = 'true'
        else: error = 'false'
        item.append(error)
        return item
예제 #2
0
 def spider(item):
     nonlocal total_count
     nonlocal post_poor
     nonlocal error_count
     error = 'false'
     try:
         total_count += 1
         error = butterfly.get_last_post_from_butterfly(item, post_poor)
         if error == 'true':
             error = matery.get_last_post_from_matery(item, post_poor)
         if error == 'true':
             error = volantis.get_last_post_from_volantis(item, post_poor)
         if error == 'true':
             print("-----------获取主页信息失败,采取sitemap策略----------")
             error, post_poor = sitmap_get(item, post_poor)
     except Exception as e:
         print('\n')
         print(item, "运用主页及sitemap爬虫爬取失败!请检查")
         print('\n')
         print(e)
         error_count += 1
     item.append(error)
     return item
예제 #3
0
def main():
        # 引入leancloud验证
        if configs.DEBUG:
            leancloud.init(configs.LC_APPID, configs.LC_APPKEY)
            friendpage_link = configs.FRIENPAGE_LINK
        else:
            leancloud.init(sys.argv[1], sys.argv[2])
            friendpage_link = sys.argv[3]

        # 导入yml配置文件
        # config = load_config()
        config = configs.yml

        # 执行主方法
        print('----------------------')
        print('-----------!!开始执行爬取文章任务!!----------')
        print('----------------------')
        print('\n')
        # 分离到handlers.coreDatas.py
        # today = datetime.datetime.today()
        # time_limit = 60
        friend_poor = []
        post_poor = []
        print('----------------------')
        print('-----------!!开始执行友链获取任务!!----------')
        print('----------------------')
        if config['setting']['gitee_friends_links']['enable'] and config['setting']['gitee_friends_links']['type'] == 'normal':
            try:
                kang_api(friend_poor)
            except:
                print('读取gitee友链失败')
        else:
            print('未开启gitee友链获取')
        if config['setting']['github_friends_links']['enable'] and config['setting']['github_friends_links']['type'] == 'normal':
            try:
                github_issuse(friend_poor)
            except:
                print('读取github友链失败')
        else:
            print('未开启gihub友链获取')
        try:
            butterfly.butterfly_get_friendlink(friendpage_link,friend_poor)
        except:
            print('不是butterfly主题')
        try:
            matery.matery_get_friendlink(friendpage_link,friend_poor)
        except:
            print('不是matery主题')
        try:
            volantis.volantis_get_friendlink(friendpage_link,friend_poor)
        except:
            print('不是volantis主题或未配置gitee友链')
        friend_poor = delete_same_link(friend_poor)
        friend_poor = block_link(friend_poor)
        print('当前友链数量', len(friend_poor))
        print('----------------------')
        print('-----------!!结束友链获取任务!!----------')
        print('----------------------')
        total_count = 0
        error_count = 0
        for index, item in enumerate(friend_poor):
            error = 'false'
            try:
                total_count += 1
                error = butterfly.get_last_post_from_butterfly(item, post_poor)
                if error == 'true':
                    error = matery.get_last_post_from_matery(item, post_poor)
                if error == 'true':
                    error = volantis.get_last_post_from_volantis(item, post_poor)
                if error == 'true':
                    print("-----------获取主页信息失败,采取sitemap策略----------")
                    error, post_poor = sitmap_get(item, post_poor)
            except Exception as e:
                print('\n')
                print(item, "运用主页及sitemap爬虫爬取失败!请检查")
                print('\n')
                print(e)
                error_count += 1
            item.append(error)
        print('\n')
        print('----------------------')
        print("一共进行%s次" % total_count)
        print("一共失败%s次" % error_count)
        print('----------------------')
        print('\n')
        print('----------------------')
        print('-----------!!执行用户信息上传!!----------')
        print('----------------------')
        leancloud_push_userinfo(friend_poor)
        print('----------------------')
        print('-----------!!用户信息上传完毕!!----------')
        print('----------------------')
        post_poor.sort(key=itemgetter('time'), reverse=True)
        print('----------------------')
        print('-----------!!执行文章信息上传!!----------')
        print('----------------------')
        leancloud_push(post_poor)
        print('----------------------')
        print('-----------!!文章信息上传完毕!!----------')
        print('----------------------')