def parse_product(self, response): for a in response.xpath("//div[@id='mainbox']//dd/a"): url = a.xpath("./@href").extract()[0] title = a.xpath("./text()").extract()[0] description = title items = title.split(' ') product = items[0] version = items[-1] #FH456V1.0 Firmware V10.1.1.1_EN #E101(V2.0) Firmware V1.10.0.1_EN #G3(V2.0) Firmware V2.0.0.1_EN #O3 Firmware V1.0.0.3_EN #i6 Firmware V1.0.0.9(3857)_EN import re p = ur'^(?P<product>([a-uw-zA-UW-Z0-9])+)[\(\uff08]?(V\d\.0)?' try: ret = re.search(p, items[0].decode('utf-8')) if ret: product = ret.group('product') except: product = item[0] item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value( "version", version) item.add_value("url", url) item.add_value("product", product) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product_firmware(self, response): # Get product name product = response.meta["product"] # Get the product last updated date create_date = '' for li_elem in response.xpath('//li'): if li_elem.xpath('@class').re(r'(\[hide_empty:create_date\])'): create_date = li_elem.xpath( './/span[@class="badge"]/text()').extract_first() elif li_elem.xpath('@class').re(r'(\[hide_empty:update_date\])'): update_date = li_elem.xpath( './/span[@class="badge"]/text()').extract_first() break else: update_date = create_date # File list table of downloads file_table = response.xpath( '//table[@class="wpdm-filelist table table-hover"]') for dl_button in file_table.xpath( './/a[@class="inddl btn btn-primary btn-sm"]'): href = dl_button.xpath("@href") item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt="%B %d, %Y") item.add_value("product", product) item.add_value("vendor", self.name) item.add_value("date", update_date) item.add_value("url", href.extract_first()) yield item.load_item()
def parse_product(self, response): url = response.xpath("//div[@class='thumbnail']//a/@href").extract()[0] item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value( "version", response.meta['version']) item.add_value("url", url) item.add_value("product", response.meta['product']) item.add_value("vendor", self.vendor) yield item.load_item()
def parse(self, response): for a in response.xpath("//table//tr//td[2]//a"): title = a.xpath('./@title').extract()[0] url = a.xpath('./@href').extract()[0] item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", url) item.add_value("product", self.parse_product(title)) item.add_value("description", title) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for url in self.firmware: item = FirmwareLoader(item=FirmwareImage()) item.add_value("url", url) item.add_value("product", url.split("/")[-1].split("_")[0]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for entry in response.xpath( "//div[@class='main-container']//p|//div[@class='main-container']//ul" ): text = entry.xpath(".//text()").extract() for href in entry.xpath(".//a/@href").extract(): if "Firmware" in href: item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("product", FirmwareLoader.find_product(text)) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for entry in response.xpath( "//div[@class='main-container']//p|//div[@class='main-container']//ul"): text = entry.xpath(".//text()").extract() for href in entry.xpath(".//a/@href").extract(): if "Firmware" in href: item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value( "product", FirmwareLoader.find_product(text)) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for link in response.xpath("//a"): text = link.xpath("text()").extract()[0] href = link.xpath("@href").extract()[0] yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"version": FirmwareLoader.find_version_period(text)}, callback=self.parse_url)
def parse(self, response): for link in response.xpath("//a"): text = link.xpath("text()").extract_first() href = link.xpath("@href").extract_first() if text is None and href == "/": # <a href="/"><em>(root)</em></a> continue yield Request( url=urllib.parse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"version": FirmwareLoader.find_version_period(text)}, callback=self.parse_url)
def parse(self, response): for href in response.xpath("//a/@href").extract(): if href == ".." or href == "/": continue elif href.endswith(".bin") or href.endswith(".upg"): item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("vendor", self.name) yield item.load_item() elif "/" in href: yield Request(url=urllib.parse.urljoin(response.url, href), headers={"Referer": response.url}, callback=self.parse)
def parse(self, response): for entry in response.xpath("//div[@class='content']//a"): text = entry.xpath(".//text()").extract() href = entry.xpath("./@href").extract()[0] idx = None for string in text: if "---" in string: idx = int(string.split("-")[0]) break if not idx: continue item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("version", self.firmware[idx][1]) item.add_value("product", self.firmware[idx][0]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): product = None for section in response.xpath("//div[@class='product-content']/div[@class='product-box2']/div"): text = section.xpath(".//text()").extract() if not section.xpath(".//a"): product = text[0].strip() else: for link in section.xpath(".//a/@href").extract(): if link.endswith(".html"): yield Request( url=urllib.parse.urljoin(response.url, link), meta={"product": product, "version": FirmwareLoader.find_version(text)}, headers={"Referer": response.url}, callback=self.parse_download)
def parse(self, response): product = None for section in response.xpath("//div[@class='product-content']/div[@class='product-box2']/div"): text = section.xpath(".//text()").extract() if not section.xpath(".//a"): product = text[0].strip() else: for link in section.xpath(".//a/@href").extract(): if link.endswith(".html"): yield Request( url=urlparse.urljoin(response.url, link), meta={"product": product, "version": FirmwareLoader.find_version(text)}, headers={"Referer": response.url}, callback=self.parse_download)
def parse_product(self, response): for entry in response.xpath("//div[@class='view-content']//table"): if "firmware update" in " ".join( entry.xpath("./caption//text()").extract()).lower(): for link in entry.xpath("./tbody/tr/td[1]/a"): if link.xpath(".//text()"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract()[0] product = response.xpath( "//div[@id='--2']/div[3]//div[@class='inside']//text()" ).extract()[2].upper().split() for category in ["RESOURCES", "FOR", "SERIES"]: if category in product: product.remove(category) item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("product", " ".join(product)) item.add_value("description", text) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for href in response.xpath("//a/@href").extract(): if href == ".." or href == "/": continue elif href.endswith(".bin") or href.endswith(".upg"): item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("vendor", self.name) yield item.load_item() elif "/" in href: yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, callback=self.parse)
def parse_product(self, response): for entry in response.xpath("//div[@class='view-content']//table"): if "firmware update" in " ".join(entry.xpath( "./caption//text()").extract()).lower(): for link in entry.xpath("./tbody/tr/td[1]/a"): if link.xpath(".//text()"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract()[0] product = response.xpath( "//div[@id='--2']/div[3]//div[@class='inside']//text()").extract()[2].upper().split() for category in ["RESOURCES", "FOR", "SERIES"]: if category in product: product.remove(category) item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("product", " ".join(product)) item.add_value("description", text) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for href in response.xpath("//a/@href").extract(): if href.endswith(".npk") or href.endswith(".lzb"): if href.startswith("//"): href = "http:" + href text = response.xpath("//text()").extract() items = href.split('/') version = items[-2] basename = items[-1] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"]) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", basename[0: basename.rfind("-")]) item.add_value("vendor", self.name) item.add_value( "version", version) yield item.load_item()
def parse_download(self, response): json_response = json.loads(response.body_as_unicode()) for file in json_response: if file["subFileType"] == "firmware": item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("version", file["fileVersion"]) item.add_value( "date", datetime.datetime.fromtimestamp( int(file["releaseDate"]) / 1000).strftime( item.context.get("date_fmt")[0])) item.add_value("description", file["fileName"]) item.add_value("url", file["downloadUrl"]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): for href in response.xpath("//a/@href").extract(): if href.endswith(".npk") or href.endswith(".lzb"): text = response.xpath("//text()").extract() basename = href.split("/")[-1] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"]) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", basename[0:basename.rfind("-")]) item.add_value("vendor", self.name) item.add_value("version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse(self, response): if response.xpath("//select[@id='router']"): for product in response.xpath( "//select[@id='router']/option/@value").extract(): if product and product != "allrouters": yield Request( url=urlparse.urljoin( response.url, "?router=%s" % (product)), headers={"Referer": response.url}, callback=self.parse) elif response.xpath("//td[@id='search_main_content']"): for link in response.xpath("//td[@id='search_main_content']//a"): if link.xpath("./@href"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract() if "download.verizon.net" in href and "firmware" in href: item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("description", text[0]) item.add_value("vendor", self.name) yield item.load_item() else: for link in response.xpath("//div[@id='ghfbodycontent']//a"): if link.xpath("./@href"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract() if "download.verizon.net" in href and "firmware" in href: item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("description", text[0]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): tmp = [] for p in response.xpath("//table//tr//td[2]"): tmp.append(p) title = tmp[0].xpath("./p/text()").extract()[0] url = urllib.parse.urljoin(self.download_path, tmp[3].xpath("./a/@href").extract()[0]) def parse(title): print(title) product = version = date = None tmp = title.split(' ') product = tmp[0] if len(tmp) == 2: #MR814v1_070807 升级程序 if '_' in tmp[0]: tmp2 = tmp[0].split('_') version = tmp2[0] date = tmp2[1][:6] #MWR300T V1(081210)标准版 elif tmp[1][0] in ['v', 'V']: pass else: tmp2 = tmp[1].split('_') version = tmp2[0] date = tmp2[1][:6] elif len(tmp) == 3: tmp2 = tmp[1].split('_') version = tmp2[0] date = tmp2[1] if version: if version[0] not in ['v', 'V']: if 'v' in product: t = product.split('v') product = t[0] version = t[1] #MR814v1_070807 升级程序 if product.count('_'): tmp = product.split('_') product = tmp[0] if product.count('v'): product = product.split('v')[0] elif product.count('V'): product = product.split('v')[0] return product, version, date product, version, date = parse(title) item = FirmwareLoader(item=FirmwareImage()) item.add_value("url", url), item.add_value("product", product), #item.add_value("date", date), #item.add_value("version", version), item.add_value("vendor", self.vendor), item.add_value("description", title) yield item.load_item()
def parse_product(self, response): # types: firmware = 20, gpl source = 30, bios = 3 for entry in response.xpath( "//div[@id='div_type_20']/div[@id='download-os-answer-table']"): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) version = FirmwareLoader.find_version_period( entry.xpath("./p//text()").extract()) gpl = None # grab first download link (e.g. DLM instead of global or p2p) href = entry.xpath("./table//tr[3]//a/@href").extract()[0] # attempt to find matching source code entry if version: for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"): if version in "".join(source.xpath("./p//text()").extract()): gpl = source.xpath("./table//tr[3]//a/@href").extract()[0] item.add_value("version", version) item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract())) item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract())) item.add_value("url", href) item.add_value("sdk", gpl) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): js = response.text if js.startswith("var commonInfo"): print response.url print js p_product = u"id:\"(?P<product>.*?)\"" p_description = u"title:\"(?P<description>.*?)\"" p_version = u"romVersions:\"(?P<version>.*?)\"" p_url = u"romUrl:\"(?P<url>.*?)\"" p_date = u"updateDate:\"(?P<date>.*?)\"" import re products = re.findall(p_version, js) descriptions = re.findall(p_description, js) versions = re.findall(p_version, js) urls = re.findall(p_url, js) dates = re.findall(p_date, js) for i in xrange(len(products)): product = products[i] url = urls[i] version = versions[i] description = descriptions[i] date = dates[i] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", url) item.add_value("product", product) item.add_value("description", description) item.add_value("date", date) item.add_value("vendor", self.name) yield item.load_item()
def parse_model_files(self, response): meta = response.meta # Due to Python2 and unicode objects, we're using response body here. Issues are from the 'remarks' fields. try: model_files = json.loads(response.body)['downloads']['firmware'] except KeyError: logging.info("No downloadable firmware for %s", meta) return for _, fw_info in model_files.iteritems(): href = fw_info['links'][ 'global'] # options: {'global', 'europe', 'usa'} if not href.startswith(u"https://") and not href.startswith( u"http://"): href = urlparse.urljoin(u"https://", href) item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt="%Y-%m-%d") item.add_value('product', meta['name']) item.add_value('vendor', self.name) item.add_value('description', fw_info['releasenote']) item.add_value('date', fw_info['published_at']) item.add_value('version', fw_info['version']) item.add_value('url', href) yield item.load_item()
def parse(self, response): for entry in response.xpath( "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"): desc = entry.xpath(".//text()").extract() for link in entry.xpath(".//a"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract()[0] if "_a=download" not in href: yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"product": text.strip().split(' ')}, callback=self.parse) elif "firmware" in text.lower() or "f/w" in text.lower(): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y", "%m/%d/%y"]) item.add_value("version", FirmwareLoader.find_version(desc)) item.add_value("date", item.find_date(desc)) item.add_value("description", text) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): for href in response.xpath("//a/@href").extract(): if href.endswith(".npk") or href.endswith(".lzb"): text = response.xpath("//text()").extract() basename = href.split("/")[-1] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"]) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", basename[0: basename.rfind("-")]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_download(self, response): for firmware in response.xpath("//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']"): product = ( response.xpath("//div[@class='prodNavHeaderBody']//text()") .extract()[0] .replace(" Support & Drivers", "") ) date = firmware.xpath(".//ul[@class='dateVersion']//strong/text()").extract() version = firmware.xpath(".//ul[@class='dateVersion']//strong/text()").extract() href = firmware.xpath(".//a/@href").extract()[0].replace("file-download", "file-redirect") text = firmware.xpath(".//a//text()").extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y"]) item.add_value("url", href) item.add_value("product", product) item.add_value("date", item.find_date(date)) item.add_value("description", text) item.add_value("version", item.find_version_period(version)) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): url =self.firmware_url + response.xpath('//a[@id="downLoadHref"]/@href').extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath("date", response.meta['date']) item.add_value("description", response.meta['description']) item.add_value("url", url) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): json_response = json.loads(response.body_as_unicode()) for file in json_response: if file["subFileType"] == "firmware": item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("version", file["fileVersion"]) item.add_value("date", datetime.datetime.fromtimestamp( int(file["releaseDate"]) / 1000).strftime(item.context.get("date_fmt")[0])) item.add_value("description", file["fileName"]) item.add_value("url", file["downloadUrl"]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.vendor) yield item.load_item()
def parse(self, response): for link in response.xpath("//a"): text = link.xpath(".//text()").extract()[0] href = link.xpath(".//@href").extract()[0] if ".." in href: continue elif href.endswith('/'): yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, callback=self.parse) elif href.endswith(".gz") and ".iso" not in href: # strip off multiple file extensions basename = os.path.splitext(text)[0] while ".img" in basename or ".iso" in basename: basename = os.path.splitext(basename)[0] basename = basename.split("-") version = FirmwareLoader.find_version_period(basename) # attempt to parse filename and generate product/version # strings remove = [version] if version else [] for i in range(0, len(basename)): if "BETA" in basename[i]: version += "-%s%s" % (basename[i], basename[i + 1]) remove.append(basename[i]) remove.append(basename[i + 1]) elif "RC" in basename[i]: version += "-%s" % (basename[i]) remove.append(basename[i]) elif "RELEASE" in basename[i]: remove.append(basename[i]) basename = [x for x in basename if x not in remove] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", version) item.add_value("url", href) item.add_value("date", item.find_date( link.xpath("following::text()").extract())) item.add_value("product", "-".join(basename)) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for entry in response.xpath( "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"): desc = entry.xpath(".//text()").extract() for link in entry.xpath(".//a"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract()[0] if "_a=download" not in href: yield Request(url=urllib.parse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"product": text.strip().split(' ')}, callback=self.parse) elif "firmware" in text.lower() or "f/w" in text.lower(): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y", "%m/%d/%y"]) item.add_value("version", FirmwareLoader.find_version(desc)) item.add_value("date", item.find_date(desc)) item.add_value("description", text) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): for section in response.xpath("//ul[@id='tab_conbox']/li"): if u"升级软件" in "".join(section.xpath("./h3//text()").extract()): for entry in section.xpath(".//dd/a"): text = entry.xpath(".//text()").extract() href = entry.xpath("./@href").extract()[0] desc = text[0] # reverse text because hw version can come before version # e.g. "FH330升级软件(V1.0) V1.0.0.24_CN" if len(text) == 1: text = text[0].split() text.reverse() item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value( "version", FirmwareLoader.find_version_period(text)) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("description", desc) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): mib = None if not response.body: return for entry in reversed(response.xpath("//table/tbody/tr")): if entry.xpath("./td[contains(@class, 'versionTd')]/select"): for i in range( 0, len(entry.xpath("./td[contains(@class, 'versionTd')]/select/option"))): desc = entry.xpath( "./td[contains(@class, 'typeTd')]/span/text()").extract()[i].lower() if "firmware" in desc: date = entry.xpath( "./td[contains(@class, 'dateTd')]/span/text()").extract()[i] ver = entry.xpath( "./td[contains(@class, 'versionTd')]/select/option/text()").extract()[i] href = entry.xpath( "./td[contains(@class, 'downloadTd')]/div/a[1]/@data-filelink").extract()[i] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"]) item.add_value("version", ver) item.add_value("date", date) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("mib", mib) item.add_value("vendor", self.name) yield item.load_item() else: desc = entry.xpath( "./td[contains(@class, 'typeTd')]//text()").extract()[1].lower() if "firmware" in desc: date = entry.xpath( "./td[contains(@class, 'dateTd')]//text()").extract() ver = entry.xpath( "./td[contains(@class, 'versionTd')]//text()").extract() href = entry.xpath( "./td[contains(@class, 'downloadTd')]//a/@data-filelink").extract()[0] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"]) item.add_value("version", ver) item.add_value("date", date) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("mib", mib) item.add_value("vendor", self.name) yield item.load_item() elif "mib" in desc: mib = entry.xpath( "./td[contains(@class, 'downloadTd')]//a/@href").extract()[0]
def parse_json(self, response): mib = None json_response = json.loads(response.body_as_unicode()) for entry in reversed(json_response["item"]): for file in reversed(entry["file"]): if file["filetypename"].lower() == "firmware" or file[ "isFirmF"] == "1": item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m/%d/%y"]) item.add_value("version", FirmwareLoader.find_version_period([file["name"]])) item.add_value("date", file["date"]) item.add_value("description", file["name"]) item.add_value("url", file["url"]) item.add_value("build", response.meta["revision"]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) item.add_value("mib", mib) yield item.load_item() elif "MIB" in file["name"]: mib = file["url"]
def parse(self, response): for href in response.xpath("//a/@href").extract(): if href.endswith(".img"): basename = href.split("/")[-1].split("-") item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("product", self.name) item.add_value("vendor", self.name) item.add_value( "version", basename[-1][0: basename[-1].rfind(".img")]) yield item.load_item()
def parse(self, response): for entry in response.xpath("//table/tr[position() > 3]"): if not entry.xpath("./td[2]/a"): continue text = entry.xpath("./td[2]/a//text()").extract()[0] href = entry.xpath("./td[2]/a/@href").extract()[0] date = entry.xpath("./td[3]//text()").extract()[0] # if "DSM" in response.url: if 'DSMUC' in response.url: software = 'DSMUC' elif 'DSM' in response.url: software = "DSM" elif 'VSM' in response.url: software = "VSM" elif "VSF" in response.url: software = "VSF" elif "SRM" in response.url: software = "SRM" else: continue # should not happen :-) if href.endswith('/'): build = None version = response.meta.get( "version", FirmwareLoader.find_version_period([text])) if not FirmwareLoader.find_version_period([text]): build = text[0: -1] yield Request( url=urllib.parse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"build": build, "version": version}, callback=self.parse) elif all(not href.lower().endswith(x) for x in [".txt", ".md5", ".torrent"]): product = None basename = os.path.splitext(text)[0].split("_") if software in basename: if response.meta["build"] in basename: basename.remove(response.meta["build"]) basename.remove(software) product = " ".join(basename) else: # usually "synology_x86_ds13_1504 product = basename[-2] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("build", response.meta["build"]) item.add_value("version", response.meta["version"]) if software == "DSM": item.add_value("mib", "https://global.download.synology.com/download/Document/Software/" "DeveloperGuide/Firmware/DSM/All/enu/Synology_MIB_File.zip") item.add_value("url", href) item.add_value("date", date) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): if response.xpath("//dl[@id='dlDropDownBox']") and "build" not in response.meta: for entry in response.xpath("//dl[@id='dlDropDownBox']//li/a"): href = entry.xpath("./@href").extract()[0] text = entry.xpath(".//text()").extract()[0] yield Request( url=urlparse.urljoin(response.url, href), meta={"product": response.meta["product"], "build": text}, headers={"Referer": response.url}, callback=self.parse_product, ) else: sdk = None for href in reversed(response.xpath("//div[@id='content_gpl_code']//a/@href").extract()): sdk = href for entry in response.xpath("//div[@id='content_firmware']//table"): href = entry.xpath("./tbody/tr[1]/th[1]//a/@href").extract()[0] text = entry.xpath("./tbody/tr[1]/th[1]//a//text()").extract()[0] date = entry.xpath("./tbody/tr[1]/td[1]//text()").extract() item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("url", href) item.add_value("date", item.find_date(date)) item.add_value("description", text) item.add_value("product", response.meta["product"]) item.add_value("build", response.meta["build"] if "build" in response.meta else None) item.add_value("vendor", self.vendor) item.add_value("sdk", sdk) yield item.load_item()
def parse_product(self, response): for row in response.xpath( "//div[@class='main_data_block']//table/tr[position() > 1]"): text = row.xpath("./td[1]//text()").extract() edition = row.xpath("./td[2]//text()").extract() date = row.xpath("./td[4]//text()").extract() hrefs = row.xpath("./td[5]//a/@href").extract() if hrefs: item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) item.add_value( "version", FirmwareLoader.find_version_period(edition)) item.add_value("build", FirmwareLoader.find_build(edition)) item.add_value("url", hrefs[0]) item.add_value("date", item.find_date(date)) item.add_value("description", text[2].strip()) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): if response.xpath("//select[@id='router']"): for product in response.xpath( "//select[@id='router']/option/@value").extract(): if product and product != "allrouters": yield Request( url=urllib.parse.urljoin( response.url, "?router=%s" % (product)), headers={"Referer": response.url}, callback=self.parse) elif response.xpath("//td[@id='search_main_content']"): for link in response.xpath("//td[@id='search_main_content']//a"): if link.xpath("./@href"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract() if "download.verizon.net" in href and "firmware" in href: item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("description", text[0]) item.add_value("vendor", self.name) yield item.load_item() else: for link in response.xpath("//div[@id='ghfbodycontent']//a"): if link.xpath("./@href"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract() if "download.verizon.net" in href and "firmware" in href: item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("description", text[0]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): #<h3 class="firm">Firmware</h3> if response.xpath('//h3[@class="firm"]').extract(): for tr in response.xpath( '//*[@id="tab-downloads"]/table[1]/tbody/tr'): print tr.extract() url = tr.xpath("./td[2]/a/@href").extract()[0] date = tr.xpath("./td[4]/text()").extract()[0] version = tr.xpath("./td[5]/text()").extract()[0] description = tr.xpath("./td[7]/text()").extract()[0] product = url.split('-')[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", version) item.add_value("description", description) item.add_value("url", url) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): for entry in response.xpath("//div[@class='downloadtable']"): text = entry.xpath(".//text()").extract() if "firmware" in " ".join(text).lower(): text = entry.xpath( ".//li[@class='maindescription' and position() = 1]//text()").extract() date = entry.xpath( ".//li[@class='maindescription' and position() = 2]//text()").extract() href = entry.xpath(".//li[@class='maindescription']//a/@onclick").extract()[ 0].split('\'')[1] + "&button=Continue+with+Download&Continue=yes" item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y"]) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("date", item.find_date(date)) item.add_value("version", FirmwareLoader.find_version(text)) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): mib = None if not response.body: return for entry in reversed(response.xpath("//table/tbody/tr")): if entry.xpath("./td[contains(@class, 'versionTd')]/select"): for i in range( 0, len( entry.xpath( "./td[contains(@class, 'versionTd')]/select/option" ))): desc = entry.xpath( "./td[contains(@class, 'typeTd')]/span/text()" ).extract()[i].lower() if "firmware" in desc: date = entry.xpath( "./td[contains(@class, 'dateTd')]/span/text()" ).extract()[i] ver = entry.xpath( "./td[contains(@class, 'versionTd')]/select/option/text()" ).extract()[i] href = entry.xpath( "./td[contains(@class, 'downloadTd')]/div/a[1]/@data-filelink" ).extract()[i] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"]) item.add_value("version", ver) item.add_value("date", date) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("mib", mib) item.add_value("vendor", self.name) yield item.load_item() else: desc = entry.xpath("./td[contains(@class, 'typeTd')]//text()" ).extract()[1].lower() if "firmware" in desc: date = entry.xpath( "./td[contains(@class, 'dateTd')]//text()").extract() ver = entry.xpath( "./td[contains(@class, 'versionTd')]//text()").extract( ) href = entry.xpath( "./td[contains(@class, 'downloadTd')]//a/@data-filelink" ).extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"]) item.add_value("version", ver) item.add_value("date", date) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("mib", mib) item.add_value("vendor", self.name) yield item.load_item() elif "mib" in desc: mib = entry.xpath( "./td[contains(@class, 'downloadTd')]//a/@href" ).extract()[0]
def parse_url(self, response): for link in response.xpath("//a"): text = link.xpath("text()").extract_first() href = link.xpath("@href").extract_first() if text is None and href == "/": # <a href="/"><em>(root)</em></a> continue if ".." in href: continue elif href.endswith('/'): if "package/" not in text: product = "%s-%s" % ( response.meta["product"], text[0:-1] ) if "product" in response.meta else text[0:-1] yield Request(url=urllib.parse.urljoin(response.url, href), headers={"Referer": response.url}, meta={ "version": response.meta["version"], "product": product }, callback=self.parse_url) elif any( href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", response.meta["version"]) item.add_value("url", href) item.add_value( "date", item.find_date(link.xpath("following::text()").extract())) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for entry in response.xpath("//table/tr[position() > 3]"): if not entry.xpath("./td[2]/a"): continue text = entry.xpath("./td[2]/a//text()").extract()[0] href = entry.xpath("./td[2]/a/@href").extract()[0] date = entry.xpath("./td[3]//text()").extract()[0] if "DSM" in response.url: if href.endswith('/'): build = None version = response.meta.get( "version", FirmwareLoader.find_version_period([text])) if not FirmwareLoader.find_version_period([text]): build = text[0: -1] yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"build": build, "version": version}, callback=self.parse) elif all(not href.lower().endswith(x) for x in [".txt", ".md5", ".torrent"]): product = None basename = os.path.splitext(text)[0].split("_") if "DSM" in basename: if response.meta["build"] in basename: basename.remove(response.meta["build"]) basename.remove("DSM") product = " ".join(basename) else: product = basename[-2] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("build", response.meta["build"]) item.add_value("version", response.meta["version"]) item.add_value( "mib", "http://dedl.synology.com/download/Document/MIBGuide/Synology_MIB_File.zip") item.add_value("url", href) item.add_value("date", date) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item() elif "VSFirmware" in response.url: if href.endswith('/'): version, build = text[0: -1].split("-") yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"build": build, "version": version}, callback=self.parse) elif all(not href.lower().endswith(x) for x in [".txt", ".md5", ".torrent"]): basename = os.path.splitext(text)[0].split("_") item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("build", response.meta["build"]) item.add_value("version", response.meta["version"]) item.add_value("url", href) item.add_value("date", date) item.add_value("product", basename[0]) item.add_value("vendor", self.name) yield item.load_item()
def parse_json(self, response): json_response = json.loads(response.body_as_unicode()) if "products" in json_response: for product in json_response["products"]: yield Request(url=urllib.parse.urljoin( response.url, "?product=%s" % (product["slug"])), headers={ "Referer": response.url, "X-Requested-With": "XMLHttpRequest" }, meta={"product": product["slug"]}, callback=self.parse_json) if "url" in response.meta: item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("url", response.meta["url"]) item.add_value("product", response.meta["product"]) item.add_value("date", response.meta["date"]) item.add_value("description", response.meta["description"]) item.add_value("build", response.meta["build"]) item.add_value("version", response.meta["version"]) item.add_value("sdk", json_response["download_url"]) item.add_value("vendor", self.name) yield item.load_item() elif "product" in response.meta: for entry in json_response["downloads"]: if entry["category__slug"] == "firmware": if entry["sdk__id"]: yield Request(url=urllib.parse.urljoin( response.url, "?gpl=%s&eula=True" % (entry["sdk__id"])), headers={ "Referer": response.url, "X-Requested-With": "XMLHttpRequest" }, meta={ "product": response.meta["product"], "date": entry["date_published"], "build": entry["build"], "url": entry["file_path"], "version": entry["version"], "description": entry["name"] }, callback=self.parse_json) else: item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("url", entry["file_path"]) item.add_value("product", response.meta["product"]) item.add_value("date", entry["date_published"]) item.add_value("description", entry["name"]) item.add_value("build", entry["build"]) item.add_value("version", entry["version"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): for link in response.xpath("//div[@id='auto']//a"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract()[0] if ("downloads" in href or "firmware" in href) and \ not href.endswith(".html"): item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", response.meta["version"]) item.add_value("url", href) item.add_value("description", text) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_json(self, response): resp = json.loads(response.text) self.logger.debug(resp) for product in resp: name = product['showName'].strip() item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y%m%d"]) # Model, Version, Date, Build self.logger.debug("Parsing '%s'" % name) match = re.search(r'^(.+) (V[\d\.]+)([^\d]+)(\d+)_([\d\.]+)$', name) if match: self.logger.debug(match.groups()) item.add_value("product", match[1]) item.add_value("version", match[2]) date = match[4] if len(date) == 6: date = "20" + date item.add_value("date", date) item.add_value("build", match[5]) else: # TL-NVR5104 V1.0_171205.标准版 match = re.search( r'^(.+)[_ ]([vV][\d\.]+)([^\d]*)_([\d]+)([^\d]+)$', name) if match: self.logger.debug(match.groups()) item.add_value("product", match[1]) item.add_value("version", match[2]) date = match[4] if len(date) == 6: date = "20" + date item.add_value("date", date) item.add_value("build", match[5]) else: # TL-IPC545K(P) V3.0_180227(1.0.14)标准版 match = re.search( r'^(.+)[_ ](V[\d\.]+)_(\d+)(([\d\.]+))([^\d]+)$', name) if match: self.logger.debug(match.groups()) item.add_value("product", match[1]) item.add_value("version", match[2]) date = match[3] if len(date) == 6: date = "20" + date item.add_value("date", date) item.add_value("build", match[4] + ' ' + match[5]) else: self.logger.debug("No match for %s" % name) print('http://service.tp-link.com.cn/download/' + quote(product['fileName'])) item.add_value( "url", 'http://service.tp-link.com.cn/download/' + quote(product['fileName'])) item.add_value("description", name) item.add_value("vendor", self.vendor) yield item.load_item()
def parse(self, response): for link in response.xpath("//a"): text = link.xpath(".//text()").extract()[0] href = link.xpath(".//@href").extract()[0] if ".." in href: continue elif href.endswith('/'): yield Request( url=urljoin(response.url, href), headers={"Referer": response.url}, callback=self.parse) elif href.endswith(".gz") and ".iso" not in href: # strip off multiple file extensions basename = os.path.splitext(text)[0] while ".img" in basename or ".iso" in basename: basename = os.path.splitext(basename)[0] basename = basename.split("-") version = FirmwareLoader.find_version_period(basename) # attempt to parse filename and generate product/version # strings remove = [version] if version else [] for i in range(0, len(basename)): if "BETA" in basename[i]: version += "-%s%s" % (basename[i], basename[i + 1]) remove.append(basename[i]) remove.append(basename[i + 1]) elif "RC" in basename[i]: version += "-%s" % (basename[i]) remove.append(basename[i]) elif "RELEASE" in basename[i]: remove.append(basename[i]) basename = [x for x in basename if x not in remove] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", version) item.add_value("url", href) item.add_value("date", item.find_date( link.xpath("following::text()").extract())) item.add_value("product", "-".join(basename)) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): for section in response.xpath("//ul[@id='tab_conbox']/li"): if u"升级软件" in "".join(section.xpath("./h3//text()").extract()): for entry in section.xpath(".//dd/a"): text = entry.xpath(".//text()").extract() href = entry.xpath("./@href").extract()[0] desc = text[0] # reverse text because hw version can come before version # e.g. "FH330升级软件(V1.0) V1.0.0.24_CN" if len(text) == 1: text = text[0].split() text.reverse() item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", FirmwareLoader.find_version_period(text)) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("description", desc) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_kb(self, response): # initial html tokenization to find regions segmented by e.g. "======" # or "------" filtered = response.xpath( "//div[@class='sfdc_richtext']").extract()[0].split("=-") for entry in [x and x.strip() for x in filtered]: resp = HtmlResponse(url=response.url, body=entry, encoding=response.encoding) for link in resp.xpath("//a"): href = link.xpath("@href").extract()[0] if "cache-www" in href: text = resp.xpath("//text()").extract() text_next = link.xpath("following::text()").extract() item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"]) version = FirmwareLoader.find_version_period(text_next) if not version: version = FirmwareLoader.find_version_period(text) item.add_value("version", version) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): if response.xpath("//form[@name='UCagreement']"): for href in response.xpath( "//div[@id='productAndDoc']").extract()[0].split('"'): if "downloads.polycom.com" in href: item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"]) item.add_value("version", response.meta["version"]) item.add_value("url", href.encode("utf-8")) item.add_value("date", response.meta["date"]) item.add_value("description", response.meta["description"]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item() elif response.xpath("//div[@id='ContentChannel']"): for entry in response.xpath("//div[@id='ContentChannel']//li"): if not entry.xpath("./a"): continue text = entry.xpath("./a//text()").extract()[0] href = entry.xpath("./a/@href").extract()[0].strip() date = entry.xpath("./span//text()").extract() path = urlparse.urlparse(href).path if any(x in text.lower() for x in ["end user license agreement", "eula", "release notes", "mac os", "windows", "guide", "(pdf)", "sample"]) or href.endswith(".pdf"): continue elif any(path.endswith(x) for x in [".htm", ".html"]) or "(html)" in text.lower(): yield Request( url=urlparse.urljoin( response.url, PolycomSpider.fix_url(href)), meta={"product": response.meta["product"] if "product" in response.meta else text, "date": date, "version": FirmwareLoader.find_version_period([text]), "description": text}, headers={"Referer": response.url}, callback=self.parse) elif path: item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"]) item.add_value( "version", FirmwareLoader.find_version_period([text])) item.add_value("url", href.encode("utf-8")) item.add_value("date", item.find_date(date)) item.add_value("description", text) # item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): #<h3 class="firm">Firmware</h3> if response.xpath('//h3[@class="firm"]').extract(): for tr in response.xpath('//*[@id="tab-downloads"]/table[1]/tbody/tr'): print tr.extract() url = tr.xpath("./td[2]/a/@href").extract()[0] date = tr.xpath("./td[4]/text()").extract()[0] version = tr.xpath("./td[5]/text()").extract()[0] description = tr.xpath("./td[7]/text()").extract()[0] product = url.split('-')[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", version) item.add_value("description", description) item.add_value("url", url) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): #<a href="#Firmware"><span>Firmware</span></a> if not response.xpath("//a[@href=\"#Firmware\"]").extract(): yield None description = response.xpath( "//div[@class=\"product-name\"]//strong/text()").extract()[0] url = response.xpath( "//*[@id=\"content_Firmware\"]/table/tbody/tr[1]/th/a/@href" ).extract()[0] date = response.xpath( "//*[@id=\"content_Firmware\"]/table/tbody/tr[2]/td[1]/span[2]/text()" ).extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("url", url) item.add_value("date", item.find_date(date)) item.add_value("description", description) item.add_value("product", response.meta["product"]) item.add_value("version", response.meta["version"]) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_kb(self, response): # initial html tokenization to find regions segmented by e.g. "======" # or "------" filtered = response.xpath( "//div[@class='sfdc_richtext']").extract()[0].split("=-") for entry in [x and x.strip() for x in filtered]: resp = HtmlResponse(url=response.url, body=entry, encoding=response.encoding) for link in resp.xpath("//a"): href = link.xpath("@href").extract()[0] if "cache-www" in href: text = resp.xpath("//text()").extract() text_next = link.xpath("following::text()").extract() item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"]) version = FirmwareLoader.find_version_period(text_next) if not version: version = FirmwareLoader.find_version_period(text) item.add_value("version", version) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): import re #/cn/Uploads/files/20161024/K1_V22.4.2.15.bin print response.text path = re.findall(u"(/cn/Uploads/files/.*?\.bin)", response.text)[0] url = "http://www.phicomm.com/{}".format(path) item = FirmwareLoader( item=FirmwareImage()) item.add_value("url", url), item.add_value("product", response.meta['product']), item.add_value("date", response.meta['date']), item.add_value("version", response.meta['version']), item.add_value("vendor", self.vendor), item.add_value("description", response.meta['description']), yield item.load_item()
def parse(self, response): for link in response.xpath("//table//tr"): if not link.xpath("./td[2]/a"): continue text = link.xpath("./td[2]/a/text()").extract()[0] href = link.xpath("./td[2]//@href").extract()[0] if ".." in href: continue elif href.endswith('/'): build = response.meta.get("build", None) product = response.meta.get("product", None) if not product: product = text elif not build: build = text.replace("build", "") yield Request(url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={ "build": build, "product": product }, callback=self.parse) elif any( href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("build", response.meta["build"]) item.add_value("url", href) item.add_value( "version", FirmwareLoader.find_version_period( os.path.splitext(text)[0].split("-"))) item.add_value( "date", item.find_date(link.xpath("./td[3]/text()").extract())) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product_sw_fw(self, response): product = response.meta['product'] fw_sect = None #inspect_response(response, self) col_selector_map = {} # Find the "Firmware" section. NOTE: whitespace in the class is intentional for section in response.css('div.docs-table__section '): for col in section.css('div.docs-table__column-name'): col_text = col.xpath('.//text()').extract_first().strip() if len(col_text) > 1: col_selector_map[col_text] = section try: fw_sect = col_selector_map[u'Firmware'] except KeyError: logging.debug("Did not find a 'Firmware' section in the downloads for %s", product) return # Iterate Firmware rows for fw_row in fw_sect.css('div.docs-table__row'): fw_version, fw_href, fw_date, fw_desc = self.extract_fw_info(fw_row, response) if fw_href is None: continue item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%m/%d/%y"]) item.add_value('product', product) item.add_value('vendor', self.name) item.add_value('url', fw_href) item.add_value('description', fw_desc) item.add_value('date', fw_date) yield item.load_item()