def parse_kb(self, response): mib = None # need to perform some nasty segmentation because different firmware versions are not clearly separated # reverse order to get MIB before firmware items for entry in reversed( response.xpath( "//div[@id='support-article-downloads']/div/p")): for segment in reversed(entry.extract().split("<br><br>")): resp = HtmlResponse(url=response.url, body=segment, encoding=response.encoding) for href in resp.xpath("//a/@href").extract(): text = resp.xpath("//text()").extract() if "MIBs" in href: mib = href elif "firmware" in href: text = resp.xpath("//text()").extract() item = FirmwareLoader(item=FirmwareImage(), response=resp, date_fmt=["%m/%d/%Y"]) item.add_value("date", item.find_date(text)) item.add_xpath("url", "//a/@href") item.add_value("mib", mib) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_url(self, response): for link in response.xpath("//a"): text = link.xpath("text()").extract()[0] href = link.xpath("@href").extract()[0] if ".." in href: continue elif href.endswith('/'): if "package/" not in text: product = "%s-%s" % ( response.meta["product"], text[0:-1] ) if "product" in response.meta else text[0:-1] yield Request(url=urljoin(response.url, href), headers={"Referer": response.url}, meta={ "version": response.meta["version"], "product": product }, callback=self.parse_url) elif any( href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", response.meta["version"]) item.add_value("url", href) item.add_value( "date", item.find_date(link.xpath("following::text()").extract())) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): for a in response.xpath("//div[@id='mainbox']//dd/a"): url = a.xpath("./@href").extract()[0] title = a.xpath("./text()").extract()[0] description = title items = title.split(' ') product = items[0] version = items[-1] #FH456V1.0 Firmware V10.1.1.1_EN #E101(V2.0) Firmware V1.10.0.1_EN #G3(V2.0) Firmware V2.0.0.1_EN #O3 Firmware V1.0.0.3_EN #i6 Firmware V1.0.0.9(3857)_EN import re p = ur'^(?P<product>([a-uw-zA-UW-Z0-9])+)[\(\uff08]?(V\d\.0)?' try: ret = re.search(p, items[0].decode('utf-8')) if ret: product = ret.group('product') except: product = item[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", version) item.add_value("url", url) item.add_value("product", product) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_model_files(self, response): meta = response.meta # Due to Python2 and unicode objects, we're using response body here. Issues are from the 'remarks' fields. try: model_files = json.loads(response.body)['downloads']['firmware'] except KeyError: logging.info("No downloadable firmware for %s", meta) return for _, fw_info in list(model_files.items()): href = fw_info['links'][ 'global'] # options: {'global', 'europe', 'usa'} if not href.startswith("https://") and not href.startswith( "http://"): href = urllib.urljoin("https://", href) item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt="%Y-%m-%d") item.add_value('product', meta['name']) item.add_value('vendor', self.name) item.add_value('description', fw_info['releasenote']) item.add_value('date', fw_info['published_at']) item.add_value('version', fw_info['version']) item.add_value('url', href) yield item.load_item()
def parse_product(self, response): #<a href="#Firmware"><span>Firmware</span></a> if not response.xpath("//a[@href=\"#Firmware\"]").extract(): yield None description = response.xpath( "//div[@class=\"product-name\"]//strong/text()").extract()[0] url = response.xpath( "//*[@id=\"content_Firmware\"]/table/tbody/tr[1]/th/a/@href" ).extract()[0] date = response.xpath( "//*[@id=\"content_Firmware\"]/table/tbody/tr[2]/td[1]/span[2]/text()" ).extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("url", url) item.add_value("date", item.find_date(date)) item.add_value("description", description) item.add_value("product", response.meta["product"]) item.add_value("version", response.meta["version"]) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): # types: firmware = 20, gpl source = 30, bios = 3 for entry in response.xpath( "//div[@id='div_type_20']/div[@id='download-os-answer-table']"): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) version = FirmwareLoader.find_version_period( entry.xpath("./p//text()").extract()) gpl = None # grab first download link (e.g. DLM instead of global or p2p) href = entry.xpath("./table//tr[3]//a/@href").extract()[0] # attempt to find matching source code entry if version: for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"): if version in "".join(source.xpath("./p//text()").extract()): gpl = source.xpath("./table//tr[3]//a/@href").extract()[0] item.add_value("version", version) item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract())) item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract())) item.add_value("url", href) item.add_value("sdk", gpl) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): text = response.xpath( "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract( )[0].encode("ascii", errors="ignore") date = response.xpath( "//div[@class='download']/table[1]//tr[4]/td[2]//text()").extract( ) href = response.xpath( "//div[@class='download']/table[1]//tr[5]/td[2]/a/@href").extract( )[0] desc = response.xpath( "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract( )[0].encode("utf-8") build = None product = None if "_" in text: build = text.split("_")[1] product = text.split("_")[0] elif " " in text: product = text.split(" ")[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) item.add_value("url", href.encode("utf-8")) item.add_value("date", item.find_date(date)) item.add_value("description", desc) item.add_value("build", build) item.add_value("product", product) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): js = response.text if js.startswith("var commonInfo"): print(response.url) print(js) p_product = u"id:\"(?P<product>.*?)\"" p_description = u"title:\"(?P<description>.*?)\"" p_version = u"romVersions:\"(?P<version>.*?)\"" p_url = u"romUrl:\"(?P<url>.*?)\"" p_date = u"updateDate:\"(?P<date>.*?)\"" import re products = re.findall(p_product, js) descriptions = re.findall(p_description, js) versions = re.findall(p_version, js) urls = re.findall(p_url, js) dates = re.findall(p_date, js) for i in range(len(products)): product = products[i] url = urls[i] version = versions[i] description = descriptions[i] date = dates[i] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", url) item.add_value("version", version) item.add_value("product", product) item.add_value("description", description) item.add_value("date", date) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): for entry in response.xpath("//div[@class='downloadtable']"): text = entry.xpath(".//text()").extract() if "firmware" in " ".join(text).lower(): text = entry.xpath( ".//li[@class='maindescription' and position() = 1]//text()" ).extract() date = entry.xpath( ".//li[@class='maindescription' and position() = 2]//text()" ).extract() href = entry.xpath( ".//li[@class='maindescription']//a/@onclick" ).extract()[0].split( '\'')[1] + "&button=Continue+with+Download&Continue=yes" item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y"]) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("date", item.find_date(date)) item.add_value("version", FirmwareLoader.find_version(text)) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): for firmware in response.xpath( "//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']" ): product = response.xpath( "//div[@class='prodNavHeaderBody']//text()").extract( )[0].replace(" Support & Drivers", "") date = firmware.xpath( ".//ul[@class='dateVersion']//strong/text()").extract() version = firmware.xpath( ".//ul[@class='dateVersion']//strong/text()").extract() href = firmware.xpath(".//a/@href").extract()[0].replace( "file-download", "file-redirect") text = firmware.xpath(".//a//text()").extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y"]) item.add_value("url", href) item.add_value("product", product) item.add_value("date", item.find_date(date)) item.add_value("description", text) item.add_value("version", item.find_version_period(version)) item.add_value("vendor", self.name) yield item.load_item()
def parse_kb(self, response): # initial html tokenization to find regions segmented by e.g. "======" # or "------" filtered = response.xpath( "//div[@class='sfdc_richtext']").extract()[0].split("=-") for entry in [x and x.strip() for x in filtered]: resp = HtmlResponse(url=response.url, body=entry, encoding=response.encoding) for link in resp.xpath("//a"): href = link.xpath("@href").extract()[0] if "cache-www" in href: text = resp.xpath("//text()").extract() text_next = link.xpath("following::text()").extract() item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"]) version = FirmwareLoader.find_version_period(text_next) if not version: version = FirmwareLoader.find_version_period(text) item.add_value("version", version) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_json(self, response): resp = json.loads(response.text) self.logger.debug(resp) for product in resp: name = product['showName'].strip() item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y%m%d"]) # Model, Version, Date, Build self.logger.debug("Parsing '%s'" % name) match = re.search(r'^(.+) (V[\d\.]+)([^\d]+)(\d+)_([\d\.]+)$', name) if match: self.logger.debug(match.groups()) item.add_value("product", match[1]) item.add_value("version", match[2]) date = match[4] if len(date) == 6: date = "20" + date item.add_value("date", date) item.add_value("build", match[5]) else: # TL-NVR5104 V1.0_171205.标准版 match = re.search( r'^(.+)[_ ]([vV][\d\.]+)([^\d]*)_([\d]+)([^\d]+)$', name) if match: self.logger.debug(match.groups()) item.add_value("product", match[1]) item.add_value("version", match[2]) date = match[4] if len(date) == 6: date = "20" + date item.add_value("date", date) item.add_value("build", match[5]) else: # TL-IPC545K(P) V3.0_180227(1.0.14)标准版 match = re.search( r'^(.+)[_ ](V[\d\.]+)_(\d+)(([\d\.]+))([^\d]+)$', name) if match: self.logger.debug(match.groups()) item.add_value("product", match[1]) item.add_value("version", match[2]) date = match[3] if len(date) == 6: date = "20" + date item.add_value("date", date) item.add_value("build", match[4] + ' ' + match[5]) else: self.logger.debug("No match for %s" % name) print('http://service.tp-link.com.cn/download/' + quote(product['fileName'])) item.add_value( "url", 'http://service.tp-link.com.cn/download/' + quote(product['fileName'])) item.add_value("description", name) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): url = response.xpath("//div[@class='thumbnail']//a/@href").extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", response.meta['version']) item.add_value("url", url) item.add_value("product", response.meta['product']) item.add_value("vendor", self.vendor) yield item.load_item()
def download_item(self, response): url = "https:" + str(response.xpath("//div[@class='downbtns']/a/@href").extract_first()) item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("url", url) item.add_value("version", response.meta["version"]) item.add_value("date", response.meta["date"]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): if "product" in response.meta: for entry in response.xpath("//div[@class='tab-content']//tr")[1:]: version = entry.xpath("./td[1]//a//text()").extract_first() url = entry.xpath("./td[2]//a/@href").extract_first() if version is None or url is None: continue # remove unnecessary files to_remove_list = [ "end user license agreement", "eula", "release notes", "mac os", "windows", "guide", "(pdf)", "sample", "client", "manager", "software", "virtual", "control_panel", "activexbypass" ] if any(x in url.lower() for x in to_remove_list) \ or any(x in version.lower() for x in to_remove_list) \ or any(url.endswith(x) for x in ["htm", "html", "pdf", "ova", ".plcm.vc"]): continue url = urllib.parse.urljoin(response.url, PolycomSpider.fix_url(url)), item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", version) item.add_value("url", url) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item() # all entries on the product overview pages elif response.xpath("//div[@class='product-listing']" ) and "product" not in response.meta: for entry in response.xpath("//div[@class='product-listing']//li"): if not entry.xpath("./a"): continue text = entry.xpath("./a//text()").extract_first() href = entry.xpath("./a/@href").extract_first().strip() # date = entry.xpath("./span//text()").extract() if any(x in text.lower() for x in ["advisories", "support", "notices", "features"]) \ or href.endswith(".pdf"): continue path = urllib.parse.urlparse(href).path if any(path.endswith(x) for x in [".htm", ".html"]) or "(html)" in text.lower(): yield Request(url=urllib.parse.urljoin( response.url, PolycomSpider.fix_url(href)), meta={"product": text}, headers={"Referer": response.url}, callback=self.parse)
def parse_product(self, response): url = self.firmware_url + response.xpath( '//a[@id="downLoadHref"]/@href').extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath("date", response.meta['date']) item.add_value("description", response.meta['description']) item.add_value("url", url) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for a in response.xpath("//table//tr//td[2]//a"): title = a.xpath('./@title').extract()[0] url = a.xpath('./@href').extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", url) item.add_value("product", self.parse_product(title)) item.add_value("description", title) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for href in response.xpath("//a/@href").extract(): if href.endswith(".img"): basename = href.split("/")[-1].split("-") item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("product", self.name) item.add_value("vendor", self.name) item.add_value( "version", basename[-1][0: basename[-1].rfind(".img")]) yield item.load_item()
def parse(self, response): for href in response.xpath("//a/@href").extract(): if href == ".." or href == "/": continue elif href.endswith(".bin") or href.endswith(".upg"): item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("vendor", self.name) yield item.load_item() elif "/" in href: yield Request(url=urllib.parse.urljoin(response.url, href), headers={"Referer": response.url}, callback=self.parse)
def parse(self, response): for i in range( 0, len(response.xpath("//div[@id='main_right']/span[1]/p")), 7): prods = response.xpath( "//div[@id='main_right']/span[1]//p[%d]/text()" % (i + 2)).extract()[0].split("\r\n") for product in [x for x in prods]: item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath( "version", "//div[@id='main_right']/span[1]//p[%d]/text()" % (i + 3)) item.add_xpath( "url", "//div[@id='main_right']/span[1]//p[%d]/a/@href" % (i + 7)) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item() for i in range( 0, len(response.xpath("//div[@id='main_right']/span[2]/p")), 5): prods = response.xpath( "//div[@id='main_right']/span[2]//p[%d]/text()" % (i + 2)).extract()[0].split(",") for product in [x for x in prods]: item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath( "version", "//div[@id='main_right']/span[2]//p[%d]/text()" % (i + 3)) item.add_xpath( "url", "//div[@id='main_right']/span[2]//p[%d]/a/@href" % (i + 5)) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): for link in response.xpath("//div[@id='auto']//a"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract()[0] if ("downloads" in href or "firmware" in href) and \ not href.endswith(".html"): item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", response.meta["version"]) item.add_value("url", href) item.add_value("description", text) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_link(self, response): # some items will require captcha authentication and pass a cookie e.g. # DownloadAuthorizationToken = # 7CB8169BFC8848B097BB071118F9E067431714963E3A74A45C8883A70654999980D7F1412CB98B87C802403D74B6A2611122BB3CCEE0B2ACDEEAACA8054B8FFBC4AB2C2CC992649F733AFB2446AA3DC66131E62F0697E9267A374A9E965D1286EC3CFEA1142B5244D497974E5992A3F172581BE78559432DA3A64ECC940D3C43A3C91427EEC5FC712A4ADF64D2FC6C31D62BD8E4417964B31AC6E0B8344EADEA6E81DBB33F522979F3C4FE33ECA4240C188C2C88FAEBC3E0C27AEDF79558E9113F2E7BB2CA261666A26CDA82074F0DC777F2BDB28A5A2588F7F4F67E2A4F04C4DDEE6E3A2A78E2106D2F324986705580070A9016C96007E82332EA1F1D2E9688033F514754555CE186695284B05B24DE6C99F22CCF4F43A7CB5D8AD9053929E3EFDAD40FD20497F1D9ED45BAA4C7CF1C2207C751624D755EBF0C4FF98C9B2E41437E41674C836D80C83C902C4B8B8ADDA23D813D9FA5B3331C36B05CE3C1F479220B7A02 for link in response.xpath("//tbody[@class='etdownloaditems']//tr"): item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", link.xpath( ".//td[@class='column-version']//text()").extract()[0].strip()) item.add_value("url", link.xpath(".//th/a/@href").extract()[0]) item.add_value("description", link.xpath( ".//th/a//text()").extract()[0]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): if response.xpath("//select[@id='router']"): for product in response.xpath( "//select[@id='router']/option/@value").extract(): if product and product != "allrouters": yield Request( url=urllib.parse.urljoin( response.url, "?router=%s" % (product)), headers={"Referer": response.url}, callback=self.parse) elif response.xpath("//td[@id='search_main_content']"): for link in response.xpath("//td[@id='search_main_content']//a"): if link.xpath("./@href"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract() if "download.verizon.net" in href and "firmware" in href: item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("description", text[0]) item.add_value("vendor", self.name) yield item.load_item() else: for link in response.xpath("//div[@id='ghfbodycontent']//a"): if link.xpath("./@href"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract() if "download.verizon.net" in href and "firmware" in href: item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("description", text[0]) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): json_response = json.loads(response.body_as_unicode()) for file in json_response: if file["subFileType"] == "firmware": item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("version", file["fileVersion"]) item.add_value("date", datetime.datetime.fromtimestamp( int(file["releaseDate"]) / 1000).strftime(item.context.get("date_fmt")[0])) item.add_value("description", file["fileName"]) item.add_value("url", file["downloadUrl"]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): for image in response.xpath( "//div[@id='accordion-2']//tr[position() > 1]"): text = image.xpath("./td[2]//a[1]/text()").extract() if "firmware" in "".join(text).lower(): item = FirmwareLoader(item=FirmwareImage(), response=response, selector=image, date_fmt=["%Y-%m-%d"]) item.add_xpath("date", "td[1]//text()") item.add_value("description", text) item.add_xpath("url", "td[2]//a[1]/@href") item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse(self, response): for entry in response.xpath( "//div[@class='main-container']//p|//div[@class='main-container']//ul" ): text = entry.xpath(".//text()").extract() for href in entry.xpath(".//a/@href").extract(): if "Firmware" in href: item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("product", FirmwareLoader.find_product(text)) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): for href in response.xpath("//a/@href").extract(): if href.endswith(".npk") or href.endswith(".lzb"): text = response.xpath("//text()").extract() basename = href.split("/")[-1] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"]) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", basename[0:basename.rfind("-")]) item.add_value("vendor", self.name) item.add_value("version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_product(self, response): import re #/cn/Uploads/files/20161024/K1_V22.4.2.15.bin print response.text path = re.findall(u"(/cn/Uploads/files/.*?\.bin)", response.text)[0] url = "http://www.phicomm.com/{}".format(path) item = FirmwareLoader(item=FirmwareImage()) item.add_value("url", url), item.add_value("product", response.meta['product']), item.add_value("date", response.meta['date']), item.add_value("version", response.meta['version']), item.add_value("vendor", self.vendor), item.add_value("description", response.meta['description']), yield item.load_item()
def parse(self, response): for link in response.xpath("//a"): text = link.xpath(".//text()").extract()[0] href = link.xpath(".//@href").extract()[0] if ".." in href: continue elif href.endswith('/'): yield Request(url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, callback=self.parse) elif href.endswith(".gz") and ".iso" not in href: # strip off multiple file extensions basename = os.path.splitext(text)[0] while ".img" in basename or ".iso" in basename: basename = os.path.splitext(basename)[0] basename = basename.split("-") version = FirmwareLoader.find_version_period(basename) # attempt to parse filename and generate product/version # strings remove = [version] if version else [] for i in range(0, len(basename)): if "BETA" in basename[i]: version += "-%s%s" % (basename[i], basename[i + 1]) remove.append(basename[i]) remove.append(basename[i + 1]) elif "RC" in basename[i]: version += "-%s" % (basename[i]) remove.append(basename[i]) elif "RELEASE" in basename[i]: remove.append(basename[i]) basename = [x for x in basename if x not in remove] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", version) item.add_value("url", href) item.add_value( "date", item.find_date(link.xpath("following::text()").extract())) item.add_value("product", "-".join(basename)) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): results = [] outers = response.css('#topicsdownload:not(.hidea)') outer_count = len(outers) if outer_count not in self.outer_count_map: self.outer_count_map[outer_count] = 0 self.outer_count_map[outer_count] += 1 if outer_count == 0: logging.warning('Cannot find download section on URL: ' + response.request.url) return elif outer_count > 0: logging.warning('Duplicate download sections present on URL: ' + response.request.url + '. Picking the first.') outer = outers[0] items = outer.css('.accordion-item') if len(items) == 0: logging.warning('No download items found on URL: ' + response.request.url) return for item in items: name = item.css('.accordion-title h1')[0].xpath( "text()").extract()[0].encode('utf-8') link = item.css('.accordion-content a')[0].xpath( "@href").extract()[0] if not ("Firmware" in name or "firmware" in name): logging.warning('Skipping non-firmware download: ' + name) continue result = FirmwareImage() result['product'] = response.css('.model .product-code').xpath( "text()").extract()[0].strip() result['vendor'] = 'Netgear' result['description'] = name result['url'] = link results.append(result) return results