def start_requests(self): cookies = build_cookies(self) file_dir = os.getcwd() sub_dir = os.sep.join(['ship', 'shipping']) x = 0 for fn_item in os.walk(sub_dir): for fn in fn_item[2]: self.source = fn.split(u' - ')[0] file_path = os.sep.join([file_dir, sub_dir, fn]) response = Response(file_path, body=''.join(open(file_path, u'r').readlines())) response.body_as_unicode = lambda :response.body hxs = HtmlXPathSelector(response) a_tags = hxs.select('//table[@class="text2"]//a[@class="links2"]') for a_tag in a_tags: detail_url = a_tag.select('@href').extract()[0] try: title = a_tag.select('text()').extract()[0] except Exception as e: continue yield Request(self.home_page + detail_url, self.parse, cookies=cookies) print x