def read_city_codes_from_file(): """read city code files into dict """ city_code_file_path = "%s%s" % (get_project_path(), "city_code.csv") with open(city_code_file_path, "rb") as in_file: for line in in_file: _, chinese, city_code = line.split(",") _city2code[chinese] = city_code.strip()
def _read_proxys(): """加载proxy列表 Returns: proxy_list: list, proxy list """ proxys = [] # 读入代理列表 with open(get_project_path() + PROXY_FILE_PATH, "rb") as in_file: csv_reader = csv.reader(in_file, lineterminator="\n") for line in csv_reader: if len(line) != 0: host, port = line[0], line[1] proxys.append((str(host), int(port))) return proxys
class CtripSpider(BaseSpider): """用于获取携程酒店信息的爬虫 """ parsers = { "CityParser": CityParser, "HotelParser": HotelParser, "HotelListParser": HotelListParser, } pipelines = { "CityItem": CityItemPipeline, "HotelCodeItem": HotelCodeItemPipeline, "HotelInfoItem": HotelInfoItemPipeline, "RoomInfoItem": RoomInfoItemPipeline, "ImageItem": ImageItemPipeline, } start_tasks = [ FileTask("%sctrip_city.xml" % get_project_path(), callback="CityParser", max_fail_count=8), ]