def shop_routine(self, target_url, **kwargs):
    with MySession() as session:
        try:
            page = session.get(target_url)
            page.encoding = 'utf8'
        except Exception as exc:
            exc.error_code = proj.my_lib.parser_exception.PROXY_INVALID
            raise exc
        try:
            result = shop_parse(page.content, target_url)
        except Exception as exc:
            exc.error_code = proj.my_lib.parser_exception.PARSE_ERROR
            raise exc

        try:
            print shop_insert_db(result, 'NULL')
        except Exception as exc:
            exc.error_code = proj.my_lib.parser_exception.STORAGE_ERROR
            raise exc

        try:
            save_task_and_page_content(task_name='daodao_poi_shop',
                                       content=page.content,
                                       task_id=kwargs['mongo_task_id'],
                                       source='daodao',
                                       source_id='NULL',
                                       city_id='NULL',
                                       url=target_url)
        except Exception as exc:
            exc.error_code = 100
            raise exc
Example #2
0
def get_lost_attr(self, target_url, city_id, **kwargs):
    with MySession() as session:
        page = session.get(target_url, timeout=15)
        page.encoding = 'utf8'
        result = attr_parser(page.content, target_url)
        if result == 'Error':
            raise Exception, 'parse %s Error' % target_url
        else:
            print attr_insert_db(result, city_id)
            save_task_and_page_content(task_name='daodao_poi_attr', content=page.content,
                                       task_id=kwargs['mongo_task_id'],
                                       source='daodao',
                                       source_id='NULL',
                                       city_id='NULL', url=target_url)

        return result
Example #3
0
def get_lost_rest(self, target_url, city_id, **kwargs):
    with MySession() as session:
        page = session.get(target_url, timeout=15)
        page.encoding = 'utf8'
        result = rest_parser(page.content, target_url, city_id=city_id)
        save_task_and_page_content(task_name='daodao_poi_attr', content=page.content,
                                   task_id=kwargs['mongo_task_id'],
                                   source='daodao',
                                   source_id='NULL',
                                   city_id='NULL', url=target_url)

        if result == 'Error':
            raise Exception, 'parse %s Error' % target_url

        if not result['imgurl']:
            raise Exception('zhao bu dao tupian')
        logger.info('-------3-----------     ' + str(result))
        rest_insert_db(result, city_id)
        return result
Example #4
0
def hotel_routine_base_data(self, source, url, other_info, **kwargs):
    self.task_source = source.title()
    self.task_type = 'Hotel'

    self.error_code = 0

    # 初始化任务
    try:
        # hotels
        if source == 'hotels':
            hotel_id = re.findall('hotel-id=(\d+)', url)[0]
            url = 'http://zh.hotels.com/hotel/details.html?hotel-id=' + hotel_id
    except Exception as e:
        self.error_code = 12
        logger.exception(e)
        raise e

    # 修改请求参数
    try:
        pass
    except Exception as e:
        self.error_code = 101
        logger.exception(e)
        raise e

    try:
        session = MySession()
        page = session.get(url, timeout=240)
        page.encoding = 'utf8'
        content = page.text
    except Exception as e:
        self.error_code = 22
        logger.exception(e)
        raise e

    try:
        result = parse_hotel(content=content,
                             url=url,
                             other_info=other_info,
                             source=source,
                             part="NULL")
    except TypeCheckError as e:
        self.error_code = 102
        logger.exception(e)
        raise e
    except Exception as e:
        self.error_code = 27
        logger.exception(e)
        raise e

    try:
        session = DBSession()
        session.merge(result)
        session.commit()
        session.close()
    except Exception as e:
        self.error_code = 33
        logger.exception(e)
        raise e

    try:
        # 保存抓取成功后的页面信息
        save_task_and_page_content(
            task_name='hotelinfo_routine_{0}'.format(source),
            content=content,
            task_id=kwargs['mongo_task_id'],
            source=source,
            source_id=other_info['source_id'],
            city_id=other_info['city_id'],
            url=url)
    except Exception as e:
        self.error_code = 104
        logger.exception(e)
        raise e