Example #1
0
def read_city_codes_from_file():
    """read city code files into dict
    """
    city_code_file_path = "%s%s" % (get_project_path(), "city_code.csv")

    with open(city_code_file_path, "rb") as in_file:
        for line in in_file:
            _, chinese, city_code = line.split(",")
            _city2code[chinese] = city_code.strip()
Example #2
0
def read_city_codes_from_file():
    """read city code files into dict
    """
    city_code_file_path = "%s%s" % (get_project_path(), "city_code.csv")

    with open(city_code_file_path, "rb") as in_file:
        for line in in_file:
            _, chinese, city_code = line.split(",")
            _city2code[chinese] = city_code.strip()
Example #3
0
def _read_proxys():
    """加载proxy列表
        Returns:
            proxy_list: list, proxy list
    """
    proxys = []

    # 读入代理列表
    with open(get_project_path() + PROXY_FILE_PATH, "rb") as in_file:
        csv_reader = csv.reader(in_file, lineterminator="\n")
        for line in csv_reader:
            if len(line) != 0:
                host, port = line[0], line[1]
                proxys.append((str(host), int(port)))
    return proxys
Example #4
0
def _read_proxys():
    """加载proxy列表
        Returns:
            proxy_list: list, proxy list
    """
    proxys = []

    # 读入代理列表
    with open(get_project_path() + PROXY_FILE_PATH, "rb") as in_file:
        csv_reader = csv.reader(in_file, lineterminator="\n")
        for line in csv_reader:
            if len(line) != 0:
                host, port = line[0], line[1]
                proxys.append((str(host), int(port)))
    return proxys
Example #5
0
class CtripSpider(BaseSpider):
    """用于获取携程酒店信息的爬虫
    """

    parsers = {
        "CityParser": CityParser,
        "HotelParser": HotelParser,
        "HotelListParser": HotelListParser,
    }

    pipelines = {
        "CityItem": CityItemPipeline,
        "HotelCodeItem": HotelCodeItemPipeline,
        "HotelInfoItem": HotelInfoItemPipeline,
        "RoomInfoItem": RoomInfoItemPipeline,
        "ImageItem": ImageItemPipeline,
    }

    start_tasks = [
        FileTask("%sctrip_city.xml" % get_project_path(),
                 callback="CityParser",
                 max_fail_count=8),
    ]