Ejemplo n.º 1
0
def official_method():
    crawler = FilmBaseInfoCrawler()
    getor = get_url.GetUrl()
    film_id_list = crawler.read_film_id()
    cfo = config_operator.ConfigOperator()
    offset = int(cfo.get_maoyan_film('baseinfo_offset'))
    interval = int(cfo.get_maoyan_film('baseinfo_interval'))
    for num in range(offset, film_id_list.__len__()):
        film_id = int(film_id_list[num][0])
        url = crawler.base_url + film_id.__str__() + '#award'
        try:
            response = getor.get_response(url)
            dict_base_info = crawler.get_base_info(response.text, film_id)
            dict_cast_staff = crawler.get_staff_info(response.text)
            dict_awards = crawler.get_award_info(response.text)
            crawler.write_db(dict_base_info, dict_cast_staff, dict_awards, film_id)
            cfo.write_maoyan_film('baseinfo_offset', num.__str__())
        except Exception as e:
            while 1:
                try:
                    print('出现错误,30s后重试\n' + str(e))
                    getor.change_account()
                    time.sleep(30)
                    response = getor.get_response(url)
                    dict_base_info = crawler.get_base_info(response.text, film_id)
                    dict_cast_staff = crawler.get_staff_info(response.text)
                    dict_awards = crawler.get_award_info(response.text)
                    crawler.write_db(dict_base_info, dict_cast_staff, dict_awards, film_id)
                    cfo.write_maoyan_film('baseinfo_offset', num.__str__())
                    break
                except:
                    pass

        time.sleep(interval)
Ejemplo n.º 2
0
def official_method():
    crawler = DoubanFilmInfoCrawler()
    getor = get_url.GetUrl()
    film_id_list = crawler.read_db()
    cfo = config_operator.ConfigOperator()
    offset = int(cfo.get_douban_film('filminfo_offset'))
    interval = int(cfo.get_douban_film('filminfo_interval'))
    for num in range(offset, film_id_list.__len__()):
        film_id = int(film_id_list[num][0])
        url = crawler.base_url.replace('-', str(film_id))
        try:
            response = getor.get_response(url)
            dict_film_info = crawler.get_film_info(response.text, film_id)
            crawler.write_db(dict_film_info, film_id)
            cfo.write_maoyan_film('filminfo_offset', num.__str__())
        except Exception as e:
            while 1:
                try:
                    print('出现异常,30s后重试\n' + str(e))
                    getor.change_account()
                    time.sleep(30)
                    response = getor.get_response(url)
                    dict_film_info = crawler.get_film_info(
                        response.text, film_id)
                    crawler.write_db(dict_film_info, film_id)
                    cfo.write_maoyan_film('filminfo_offset', num.__str__())
                    break
                except:
                    pass

        time.sleep(interval)
Ejemplo n.º 3
0
def official_method():
    crawler = FilmRatingCrawler()
    getor = get_url.GetUrl()
    film_id_list = crawler.read_film_id()
    cfo = config_operator.ConfigOperator()
    offset = int(cfo.get_maoyan_film('rating_offset'))
    interval = int(cfo.get_maoyan_film('rating_interval'))
    for num in range(offset, film_id_list.__len__()):
        film_id = int(film_id_list[num][0])
        # 获取评分信息
        try:
            rating_url = crawler.rating_url.replace('-', film_id.__str__())
            referer = 'https://piaofang.maoyan.com/movie/%s/promotion/trailers' % film_id
            response = getor.get_response(rating_url, referer=referer)
            # dict_data = crawler.get_datadict_fromscript(response.text)
            dict_rating_info = crawler.get_ratings(response.text)
            # 获取想看数
            wanted_url = crawler.wanted_url.replace('-', film_id.__str__())
            response_wanted = getor.get_response(wanted_url, referer=referer)
            # print(html_doc_wanted)
            dict_data_wanted = crawler.get_datadict_fromscript(
                response_wanted.text)
            want_num = crawler.get_wanted(dict_data_wanted)
            crawler.write_db(dict_rating_info, want_num, film_id)
            cfo.write_maoyan_film('rating_offset', num.__str__())
        except Exception as e:
            while 1:
                try:
                    print('出现问题,30s后重试\n' + str(e))
                    getor.change_account()
                    time.sleep(30)
                    rating_url = crawler.rating_url.replace(
                        '-', film_id.__str__())
                    referer = 'https://piaofang.maoyan.com/movie/%s/promotion/trailers' % film_id
                    response = getor.get_response(rating_url, referer=referer)
                    # dict_data = crawler.get_datadict_fromscript(response.text)
                    dict_rating_info = crawler.get_ratings(response.text)
                    # 获取想看数
                    wanted_url = crawler.wanted_url.replace(
                        '-', film_id.__str__())
                    response_wanted = getor.get_response(wanted_url,
                                                         referer=referer)
                    # print(html_doc_wanted)
                    dict_data_wanted = crawler.get_datadict_fromscript(
                        response_wanted.text)
                    want_num = crawler.get_wanted(dict_data_wanted)
                    crawler.write_db(dict_rating_info, want_num, film_id)
                    cfo.write_maoyan_film('rating_offset', num.__str__())
                    break
                except Exception as e:
                    continue

        time.sleep(interval)
Ejemplo n.º 4
0
def official_method():
    crawler = FilmBoxOfficeCrawler()
    getor = get_url.GetUrl()
    film_id_list = crawler.read_film_id()
    #print(film_id_list)
    cfo = config_operator.ConfigOperator()
    offset = int(cfo.get_maoyan_film('boxoffice_offset'))
    interval = int(cfo.get_maoyan_film('boxoffice_interval'))
    for num in range(offset, film_id_list.__len__()):
        try:
            film_id = int(film_id_list[num][0])
            url = crawler.base_url.replace('-', film_id.__str__())
            response = getor.get_response(url)
            # print(html_doc)
            dict_data = crawler.get_datadict_fromscript(response.text)
            if crawler.check_data(dict_data):
                print(str(film_id) + '无数据')
                cfo.write_maoyan_film('boxoffice_offset', num.__str__())
                time.sleep(interval)
                continue
            dict_summary_info = crawler.get_summary_boxoffice(dict_data)
            dict_day_boxoffice = crawler.get_day_boxoffice(dict_data)
            crawler.write_db(dict_summary_info, dict_day_boxoffice, film_id)
            cfo.write_maoyan_film('boxoffice_offset', num.__str__())
        except Exception as e:
            while 1:
                try:
                    print('出现错误,30s后重试\n' + str(e))
                    getor.change_account()
                    time.sleep(30)
                    response = getor.get_response(url)
                    # print(html_doc)
                    dict_data = crawler.get_datadict_fromscript(response.text)
                    if crawler.check_data(dict_data):
                        print(str(film_id) + '无数据')
                        cfo.write_maoyan_film('boxoffice_offset',
                                              num.__str__())
                        time.sleep(interval)
                        continue
                    dict_summary_info = crawler.get_summary_boxoffice(
                        dict_data)
                    dict_day_boxoffice = crawler.get_day_boxoffice(dict_data)
                    crawler.write_db(dict_summary_info, dict_day_boxoffice,
                                     film_id)
                    cfo.write_maoyan_film('boxoffice_offset', num.__str__())
                except:
                    pass

        time.sleep(interval)
Ejemplo n.º 5
0
def official_method():
    crawler = BaiduIndexActorAgeGenderCrawler()
    getor = get_url.GetUrl()
    actor_name_list = crawler.read_db_name()
    cfo = config_operator.ConfigOperator()
    offset = int(cfo.get_baidu_celebrity('gender_offset'))
    interval = int(cfo.get_baidu_celebrity('gender_interval'))
    for num in range(offset, actor_name_list.__len__()):
        name = actor_name_list[num][1]
        celebrity_id = actor_name_list[num][0]
        url = crawler.base_url + name
        try:
            response = getor.get_response(url)
            # print(response.text)
            if crawler.check_response_available(response.text):
                print('%s无数据' % celebrity_id)
                continue
            dict_result = crawler.get_info(response.text)
            crawler.write_db(dict_result, celebrity_id)
            cfo.write_baidu_celebrity('gender_offset', str(num))
        except Exception as e:
            while 1:
                try:
                    print('出现错误,30s后重试\n' + str(e))
                    time.sleep(30)
                    getor.change_account()
                    response = getor.get_response(url)
                    if crawler.check_response_available(response.text):
                        print('%s无数据' % celebrity_id)
                        break
                    dict_result = crawler.get_info(response.text)
                    crawler.write_db(dict_result, celebrity_id)
                    cfo.write_baidu_celebrity('gender_offset', str(num))
                    break
                except:
                    pass

        time.sleep(interval)