def parse_kb(self, response): mib = None # need to perform some nasty segmentation because different firmware versions are not clearly separated # reverse order to get MIB before firmware items for entry in reversed( response.xpath( "//div[@id='support-article-downloads']/div/p")): for segment in reversed(entry.extract().split("<br><br>")): resp = HtmlResponse(url=response.url, body=segment, encoding=response.encoding) for href in resp.xpath("//a/@href").extract(): text = resp.xpath("//text()").extract() if "MIBs" in href: mib = href elif "firmware" in href: text = resp.xpath("//text()").extract() item = FirmwareLoader(item=FirmwareImage(), response=resp, date_fmt=["%m/%d/%Y"]) item.add_value("date", item.find_date(text)) item.add_xpath("url", "//a/@href") item.add_value("mib", mib) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_kb(self, response): mib = None # need to perform some nasty segmentation because different firmware versions are not clearly separated # reverse order to get MIB before firmware items for entry in reversed(response.xpath( "//div[@id='support-article-downloads']/div/p")): for segment in reversed(entry.extract().split("<br><br>")): resp = HtmlResponse( url=response.url, body=segment, encoding=response.encoding) for href in resp.xpath("//a/@href").extract(): text = resp.xpath("//text()").extract() if "MIBs" in href: mib = href elif "firmware" in href: text = resp.xpath("//text()").extract() item = FirmwareLoader( item=FirmwareImage(), response=resp, date_fmt=["%m/%d/%Y"]) item.add_value("date", item.find_date(text)) item.add_xpath("url", "//a/@href") item.add_value("mib", mib) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_product(self, response): url =self.firmware_url + response.xpath('//a[@id="downLoadHref"]/@href').extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath("date", response.meta['date']) item.add_value("description", response.meta['description']) item.add_value("url", url) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): url = self.firmware_url + response.xpath( '//a[@id="downLoadHref"]/@href').extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath("date", response.meta['date']) item.add_value("description", response.meta['description']) item.add_value("url", url) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): for image in response.xpath( "//div[@id='accordion-2']//tr[position() > 1]"): text = image.xpath("./td[2]//a[1]/text()").extract() if "firmware" in "".join(text).lower(): item = FirmwareLoader(item=FirmwareImage(), response=response, selector=image, date_fmt=["%Y-%m-%d"]) item.add_xpath("date", "td[1]//text()") item.add_value("description", text) item.add_xpath("url", "td[2]//a[1]/@href") item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse(self, response): for i in range(0, len(response.xpath("//div[@id='main_right']/span[1]/p")), 7): prods = response.xpath("//div[@id='main_right']/span[1]//p[%d]/text()" % (i + 2)).extract()[0].split("\r\n") for product in [x for x in prods]: item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath("version", "//div[@id='main_right']/span[1]//p[%d]/text()" % (i + 3)) item.add_xpath("url", "//div[@id='main_right']/span[1]//p[%d]/a/@href" % (i + 7)) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item() for i in range(0, len(response.xpath("//div[@id='main_right']/span[2]/p")), 5): prods = response.xpath("//div[@id='main_right']/span[2]//p[%d]/text()" % (i + 2)).extract()[0].split(",") for product in [x for x in prods]: item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath("version", "//div[@id='main_right']/span[2]//p[%d]/text()" % (i + 3)) item.add_xpath("url", "//div[@id='main_right']/span[2]//p[%d]/a/@href" % (i + 5)) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for i in range( 0, len(response.xpath("//div[@id='main_right']/span[1]/p")), 7): prods = response.xpath( "//div[@id='main_right']/span[1]//p[%d]/text()" % (i + 2)).extract()[0].split("\r\n") for product in [x for x in prods]: item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath( "version", "//div[@id='main_right']/span[1]//p[%d]/text()" % (i + 3)) item.add_xpath( "url", "//div[@id='main_right']/span[1]//p[%d]/a/@href" % (i + 7)) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item() for i in range( 0, len(response.xpath("//div[@id='main_right']/span[2]/p")), 5): prods = response.xpath( "//div[@id='main_right']/span[2]//p[%d]/text()" % (i + 2)).extract()[0].split(",") for product in [x for x in prods]: item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath( "version", "//div[@id='main_right']/span[2]//p[%d]/text()" % (i + 3)) item.add_xpath( "url", "//div[@id='main_right']/span[2]//p[%d]/a/@href" % (i + 5)) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item()