예제 #1
0
    def parse_product(self, response):
        for a in response.xpath("//div[@id='mainbox']//dd/a"):
            url = a.xpath("./@href").extract()[0]
            title = a.xpath("./text()").extract()[0]
            description = title

            items = title.split(' ')
            product = items[0]
            version = items[-1]

            #FH456V1.0 Firmware V10.1.1.1_EN
            #E101(V2.0) Firmware V1.10.0.1_EN
            #G3(V2.0) Firmware V2.0.0.1_EN
            #O3 Firmware V1.0.0.3_EN
            #i6 Firmware V1.0.0.9(3857)_EN
            import re
            p = ur'^(?P<product>([a-uw-zA-UW-Z0-9])+)[\(\uff08]?(V\d\.0)?'
            try:
                ret = re.search(p, items[0].decode('utf-8'))

                if ret:
                    product = ret.group('product')
            except:
                product = item[0]

            item = FirmwareLoader(
                item=FirmwareImage(), response=response)
            item.add_value(
                "version", version)
            item.add_value("url", url)
            item.add_value("product", product)
            item.add_value("vendor", self.vendor)
            yield item.load_item()
예제 #2
0
    def parse_product_firmware(self, response):
        # Get product name
        product = response.meta["product"]

        # Get the product last updated date
        create_date = ''
        for li_elem in response.xpath('//li'):
            if li_elem.xpath('@class').re(r'(\[hide_empty:create_date\])'):
                create_date = li_elem.xpath(
                    './/span[@class="badge"]/text()').extract_first()
            elif li_elem.xpath('@class').re(r'(\[hide_empty:update_date\])'):
                update_date = li_elem.xpath(
                    './/span[@class="badge"]/text()').extract_first()
                break
        else:
            update_date = create_date

        # File list table of downloads
        file_table = response.xpath(
            '//table[@class="wpdm-filelist table table-hover"]')
        for dl_button in file_table.xpath(
                './/a[@class="inddl btn btn-primary btn-sm"]'):
            href = dl_button.xpath("@href")

            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt="%B %d, %Y")
            item.add_value("product", product)
            item.add_value("vendor", self.name)
            item.add_value("date", update_date)
            item.add_value("url", href.extract_first())

            yield item.load_item()
예제 #3
0
 def parse_product(self, response):
     url = response.xpath("//div[@class='thumbnail']//a/@href").extract()[0]
         
     item = FirmwareLoader(
         item=FirmwareImage(), response=response)
     item.add_value(
         "version", response.meta['version'])
     item.add_value("url", url)
     item.add_value("product", response.meta['product'])
     item.add_value("vendor", self.vendor)
     yield item.load_item()
예제 #4
0
파일: ublox.py 프로젝트: ryantberg/scraper
    def parse(self, response):
        for a in response.xpath("//table//tr//td[2]//a"):
            title = a.xpath('./@title').extract()[0]
            url = a.xpath('./@href').extract()[0]

            item = FirmwareLoader(
                        item=FirmwareImage(), response=response)
            item.add_value("url", url)
            item.add_value("product", self.parse_product(title))
            item.add_value("description", title)
            item.add_value("vendor", self.name)
            yield item.load_item()
예제 #5
0
 def parse(self, response):
     for url in self.firmware:
         item = FirmwareLoader(item=FirmwareImage())
         item.add_value("url", url)
         item.add_value("product", url.split("/")[-1].split("_")[0])
         item.add_value("vendor", self.name)
         yield item.load_item()
예제 #6
0
    def parse(self, response):
        for entry in response.xpath(
                "//div[@class='main-container']//p|//div[@class='main-container']//ul"
        ):
            text = entry.xpath(".//text()").extract()

            for href in entry.xpath(".//a/@href").extract():
                if "Firmware" in href:
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response)
                    item.add_value("url", href)
                    item.add_value("product",
                                   FirmwareLoader.find_product(text))
                    item.add_value("vendor", self.name)
                    yield item.load_item()
예제 #7
0
파일: seiki.py 프로젝트: MikimotoH/scraper
    def parse(self, response):
        for entry in response.xpath(
                "//div[@class='main-container']//p|//div[@class='main-container']//ul"):
            text = entry.xpath(".//text()").extract()

            for href in entry.xpath(".//a/@href").extract():
                if "Firmware" in href:
                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response)
                    item.add_value("url", href)
                    item.add_value(
                        "product", FirmwareLoader.find_product(text))
                    item.add_value("vendor", self.name)
                    yield item.load_item()
예제 #8
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract()[0]
            href = link.xpath("@href").extract()[0]

            yield Request(
                url=urlparse.urljoin(response.url, href),
                headers={"Referer": response.url},
                meta={"version": FirmwareLoader.find_version_period(text)},
                callback=self.parse_url)
예제 #9
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract()[0]
            href = link.xpath("@href").extract()[0]

            yield Request(
                url=urlparse.urljoin(response.url, href),
                headers={"Referer": response.url},
                meta={"version": FirmwareLoader.find_version_period(text)},
                callback=self.parse_url)
예제 #10
0
파일: openwrt.py 프로젝트: lxonz/IoT-vulhub
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract_first()
            href = link.xpath("@href").extract_first()

            if text is None and href == "/":
                # <a href="/"><em>(root)</em></a>
                continue

            yield Request(
                url=urllib.parse.urljoin(response.url, href),
                headers={"Referer": response.url},
                meta={"version": FirmwareLoader.find_version_period(text)},
                callback=self.parse_url)
예제 #11
0
 def parse(self, response):
     for href in response.xpath("//a/@href").extract():
         if href == ".." or href == "/":
             continue
         elif href.endswith(".bin") or href.endswith(".upg"):
             item = FirmwareLoader(item=FirmwareImage(), response=response)
             item.add_value("url", href)
             item.add_value("vendor", self.name)
             yield item.load_item()
         elif "/" in href:
             yield Request(url=urllib.parse.urljoin(response.url, href),
                           headers={"Referer": response.url},
                           callback=self.parse)
예제 #12
0
    def parse(self, response):
        for entry in response.xpath("//div[@class='content']//a"):
            text = entry.xpath(".//text()").extract()
            href = entry.xpath("./@href").extract()[0]

            idx = None
            for string in text:
                if "---" in string:
                    idx = int(string.split("-")[0])
                    break

            if not idx:
                continue

            item = FirmwareLoader(item=FirmwareImage(), response=response)
            item.add_value("url", href)
            item.add_value("version", self.firmware[idx][1])
            item.add_value("product", self.firmware[idx][0])
            item.add_value("vendor", self.name)
            yield item.load_item()
예제 #13
0
 def parse(self, response):
     product = None
     for section in response.xpath("//div[@class='product-content']/div[@class='product-box2']/div"):
         text = section.xpath(".//text()").extract()
         if not section.xpath(".//a"):
             product = text[0].strip()
         else:
             for link in section.xpath(".//a/@href").extract():
                 if link.endswith(".html"):
                     yield Request(
                         url=urllib.parse.urljoin(response.url, link),
                         meta={"product": product,
                               "version": FirmwareLoader.find_version(text)},
                         headers={"Referer": response.url},
                         callback=self.parse_download)
예제 #14
0
 def parse(self, response):
     product = None
     for section in response.xpath("//div[@class='product-content']/div[@class='product-box2']/div"):
         text = section.xpath(".//text()").extract()
         if not section.xpath(".//a"):
             product = text[0].strip()
         else:
             for link in section.xpath(".//a/@href").extract():
                 if link.endswith(".html"):
                     yield Request(
                         url=urlparse.urljoin(response.url, link),
                         meta={"product": product,
                               "version": FirmwareLoader.find_version(text)},
                         headers={"Referer": response.url},
                         callback=self.parse_download)
예제 #15
0
    def parse_product(self, response):
        for entry in response.xpath("//div[@class='view-content']//table"):
            if "firmware update" in " ".join(
                    entry.xpath("./caption//text()").extract()).lower():
                for link in entry.xpath("./tbody/tr/td[1]/a"):
                    if link.xpath(".//text()"):
                        href = link.xpath("./@href").extract()[0]
                        text = link.xpath(".//text()").extract()[0]

                        product = response.xpath(
                            "//div[@id='--2']/div[3]//div[@class='inside']//text()"
                        ).extract()[2].upper().split()
                        for category in ["RESOURCES", "FOR", "SERIES"]:
                            if category in product:
                                product.remove(category)

                        item = FirmwareLoader(item=FirmwareImage(),
                                              response=response)
                        item.add_value("url", href)
                        item.add_value("product", " ".join(product))
                        item.add_value("description", text)
                        item.add_value("vendor", self.name)
                        yield item.load_item()
예제 #16
0
파일: tenvis.py 프로젝트: MikimotoH/scraper
    def parse(self, response):
        for entry in response.xpath("//div[@class='content']//a"):
            text = entry.xpath(".//text()").extract()
            href = entry.xpath("./@href").extract()[0]

            idx = None
            for string in text:
                if "---" in string:
                    idx = int(string.split("-")[0])
                    break

            if not idx:
                continue

            item = FirmwareLoader(item=FirmwareImage(), response=response)
            item.add_value("url", href)
            item.add_value("version", self.firmware[idx][1])
            item.add_value("product", self.firmware[idx][0])
            item.add_value("vendor", self.name)
            yield item.load_item()
예제 #17
0
파일: att.py 프로젝트: MikimotoH/scraper
 def parse(self, response):
     for href in response.xpath("//a/@href").extract():
         if href == ".." or href == "/":
             continue
         elif href.endswith(".bin") or href.endswith(".upg"):
             item = FirmwareLoader(item=FirmwareImage(), response=response)
             item.add_value("url", href)
             item.add_value("vendor", self.name)
             yield item.load_item()
         elif "/" in href:
             yield Request(
                 url=urlparse.urljoin(response.url, href),
                 headers={"Referer": response.url},
                 callback=self.parse)
예제 #18
0
파일: ublox.py 프로젝트: MikimotoH/scraper
    def parse_product(self, response):
        for entry in response.xpath("//div[@class='view-content']//table"):
            if "firmware update" in " ".join(entry.xpath(
                    "./caption//text()").extract()).lower():
                for link in entry.xpath("./tbody/tr/td[1]/a"):
                    if link.xpath(".//text()"):
                        href = link.xpath("./@href").extract()[0]
                        text = link.xpath(".//text()").extract()[0]

                        product = response.xpath(
                            "//div[@id='--2']/div[3]//div[@class='inside']//text()").extract()[2].upper().split()
                        for category in ["RESOURCES", "FOR", "SERIES"]:
                            if category in product:
                                product.remove(category)

                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response)
                        item.add_value("url", href)
                        item.add_value("product", " ".join(product))
                        item.add_value("description", text)
                        item.add_value("vendor", self.name)
                        yield item.load_item()
예제 #19
0
    def parse(self, response):

        for href in response.xpath("//a/@href").extract():
            if href.endswith(".npk") or href.endswith(".lzb"):
                if href.startswith("//"):
                    href = "http:" + href
                text = response.xpath("//text()").extract()
                items = href.split('/')
                version = items[-2]
                basename = items[-1]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"])
                item.add_value("date", item.find_date(text))
                item.add_value("url", href)
                item.add_value("product", basename[0: basename.rfind("-")])
                item.add_value("vendor", self.name)
                item.add_value(
                    "version", version)
                yield item.load_item()
예제 #20
0
    def parse_download(self, response):
        json_response = json.loads(response.body_as_unicode())

        for file in json_response:
            if file["subFileType"] == "firmware":
                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%d/%m/%y"])
                item.add_value("version", file["fileVersion"])
                item.add_value(
                    "date",
                    datetime.datetime.fromtimestamp(
                        int(file["releaseDate"]) / 1000).strftime(
                            item.context.get("date_fmt")[0]))
                item.add_value("description", file["fileName"])
                item.add_value("url", file["downloadUrl"])
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.vendor)
                yield item.load_item()
예제 #21
0
    def parse_product(self, response):
        for href in response.xpath("//a/@href").extract():
            if href.endswith(".npk") or href.endswith(".lzb"):
                text = response.xpath("//text()").extract()
                basename = href.split("/")[-1]

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%Y-%b-%d"])
                item.add_value("date", item.find_date(text))
                item.add_value("url", href)
                item.add_value("product", basename[0:basename.rfind("-")])
                item.add_value("vendor", self.name)
                item.add_value("version",
                               FirmwareLoader.find_version_period(text))
                yield item.load_item()
예제 #22
0
    def parse(self, response):
        if response.xpath("//select[@id='router']"):
            for product in response.xpath(
                    "//select[@id='router']/option/@value").extract():
                if product and product != "allrouters":
                    yield Request(
                        url=urlparse.urljoin(
                            response.url, "?router=%s" % (product)),
                        headers={"Referer": response.url},
                        callback=self.parse)

        elif response.xpath("//td[@id='search_main_content']"):
            for link in response.xpath("//td[@id='search_main_content']//a"):
                if link.xpath("./@href"):
                    href = link.xpath("./@href").extract()[0]
                    text = link.xpath(".//text()").extract()

                    if "download.verizon.net" in href and "firmware" in href:
                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response)
                        item.add_value("url", href)
                        item.add_value("description", text[0])
                        item.add_value("vendor", self.name)
                        yield item.load_item()
        else:
            for link in response.xpath("//div[@id='ghfbodycontent']//a"):
                if link.xpath("./@href"):
                    href = link.xpath("./@href").extract()[0]
                    text = link.xpath(".//text()").extract()

                    if "download.verizon.net" in href and "firmware" in href:
                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response)
                        item.add_value("url", href)
                        item.add_value("description", text[0])
                        item.add_value("vendor", self.name)
                        yield item.load_item()
예제 #23
0
    def parse_product(self, response):

        tmp = []
        for p in response.xpath("//table//tr//td[2]"):
            tmp.append(p)

        title = tmp[0].xpath("./p/text()").extract()[0]
        url = urllib.parse.urljoin(self.download_path,
                                   tmp[3].xpath("./a/@href").extract()[0])

        def parse(title):

            print(title)
            product = version = date = None

            tmp = title.split(' ')
            product = tmp[0]

            if len(tmp) == 2:
                #MR814v1_070807 升级程序
                if '_' in tmp[0]:
                    tmp2 = tmp[0].split('_')
                    version = tmp2[0]
                    date = tmp2[1][:6]
                #MWR300T V1(081210)标准版
                elif tmp[1][0] in ['v', 'V']:
                    pass
                else:
                    tmp2 = tmp[1].split('_')
                    version = tmp2[0]
                    date = tmp2[1][:6]

            elif len(tmp) == 3:
                tmp2 = tmp[1].split('_')
                version = tmp2[0]
                date = tmp2[1]

            if version:
                if version[0] not in ['v', 'V']:
                    if 'v' in product:
                        t = product.split('v')
                        product = t[0]
                        version = t[1]

            #MR814v1_070807 升级程序
            if product.count('_'):
                tmp = product.split('_')
                product = tmp[0]
            if product.count('v'):
                product = product.split('v')[0]
            elif product.count('V'):
                product = product.split('v')[0]

            return product, version, date

        product, version, date = parse(title)

        item = FirmwareLoader(item=FirmwareImage())
        item.add_value("url", url),
        item.add_value("product", product),
        #item.add_value("date", date),
        #item.add_value("version", version),
        item.add_value("vendor", self.vendor),
        item.add_value("description", title)

        yield item.load_item()
예제 #24
0
    def parse_product(self, response):
        # types: firmware = 20, gpl source = 30, bios = 3
        for entry in response.xpath(
                "//div[@id='div_type_20']/div[@id='download-os-answer-table']"):
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response, date_fmt=["%Y/%m/%d"])

            version = FirmwareLoader.find_version_period(
                entry.xpath("./p//text()").extract())
            gpl = None

            # grab first download link (e.g. DLM instead of global or p2p)
            href = entry.xpath("./table//tr[3]//a/@href").extract()[0]

            # attempt to find matching source code entry
            if version:
                for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"):
                    if version in "".join(source.xpath("./p//text()").extract()):
                        gpl = source.xpath("./table//tr[3]//a/@href").extract()[0]

            item.add_value("version", version)
            item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract()))
            item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract()))
            item.add_value("url", href)
            item.add_value("sdk", gpl)
            item.add_value("product", response.meta["product"])
            item.add_value("vendor", self.name)
            yield item.load_item()
예제 #25
0
    def parse_product(self, response):
        js = response.text
        if js.startswith("var commonInfo"):
            print response.url
            print js

            p_product = u"id:\"(?P<product>.*?)\""
            p_description = u"title:\"(?P<description>.*?)\""
            p_version = u"romVersions:\"(?P<version>.*?)\""
            p_url = u"romUrl:\"(?P<url>.*?)\""
            p_date = u"updateDate:\"(?P<date>.*?)\""

            import re
            products = re.findall(p_version, js)
            descriptions = re.findall(p_description, js)
            versions = re.findall(p_version, js)
            urls = re.findall(p_url, js)
            dates = re.findall(p_date, js)

            for i in xrange(len(products)):
                product = products[i]
                url = urls[i]
                version = versions[i]
                description = descriptions[i]
                date = dates[i]

                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("url", url)
                item.add_value("product", product)
                item.add_value("description", description)
                item.add_value("date", date)
                item.add_value("vendor", self.name)
                yield item.load_item()
예제 #26
0
    def parse_model_files(self, response):
        meta = response.meta

        # Due to Python2 and unicode objects, we're using response body here.  Issues are from the 'remarks' fields.
        try:
            model_files = json.loads(response.body)['downloads']['firmware']
        except KeyError:
            logging.info("No downloadable firmware for %s", meta)
            return

        for _, fw_info in model_files.iteritems():
            href = fw_info['links'][
                'global']  # options: {'global', 'europe', 'usa'}
            if not href.startswith(u"https://") and not href.startswith(
                    u"http://"):
                href = urlparse.urljoin(u"https://", href)

            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt="%Y-%m-%d")
            item.add_value('product', meta['name'])
            item.add_value('vendor', self.name)
            item.add_value('description', fw_info['releasenote'])
            item.add_value('date', fw_info['published_at'])
            item.add_value('version', fw_info['version'])
            item.add_value('url', href)
            yield item.load_item()
예제 #27
0
    def parse(self, response):
        for entry in response.xpath(
                "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"):
            desc = entry.xpath(".//text()").extract()

            for link in entry.xpath(".//a"):
                href = link.xpath("./@href").extract()[0]
                text = link.xpath(".//text()").extract()[0]

                if "_a=download" not in href:
                    yield Request(
                        url=urlparse.urljoin(response.url, href),
                        headers={"Referer": response.url},
                        meta={"product": text.strip().split(' ')},
                        callback=self.parse)
                elif "firmware" in text.lower() or "f/w" in text.lower():
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%m/%d/%Y", "%m/%d/%y"])
                    item.add_value("version", FirmwareLoader.find_version(desc))
                    item.add_value("date", item.find_date(desc))
                    item.add_value("description", text)
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
예제 #28
0
    def parse_product(self, response):
        for href in response.xpath("//a/@href").extract():
            if href.endswith(".npk") or href.endswith(".lzb"):
                text = response.xpath("//text()").extract()
                basename = href.split("/")[-1]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"])
                item.add_value("date", item.find_date(text))
                item.add_value("url", href)
                item.add_value("product", basename[0: basename.rfind("-")])
                item.add_value("vendor", self.name)
                item.add_value(
                    "version", FirmwareLoader.find_version_period(text))
                yield item.load_item()
예제 #29
0
파일: xerox.py 프로젝트: firmadyne/scraper
    def parse_download(self, response):
        for firmware in response.xpath("//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']"):
            product = (
                response.xpath("//div[@class='prodNavHeaderBody']//text()")
                .extract()[0]
                .replace(" Support & Drivers", "")
            )
            date = firmware.xpath(".//ul[@class='dateVersion']//strong/text()").extract()
            version = firmware.xpath(".//ul[@class='dateVersion']//strong/text()").extract()
            href = firmware.xpath(".//a/@href").extract()[0].replace("file-download", "file-redirect")
            text = firmware.xpath(".//a//text()").extract()[0]

            item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y"])
            item.add_value("url", href)
            item.add_value("product", product)
            item.add_value("date", item.find_date(date))
            item.add_value("description", text)
            item.add_value("version", item.find_version_period(version))
            item.add_value("vendor", self.name)
            yield item.load_item()
예제 #30
0
    def parse_product(self, response):
        url =self.firmware_url + response.xpath('//a[@id="downLoadHref"]/@href').extract()[0]

        item = FirmwareLoader(item=FirmwareImage(), response=response)
        item.add_xpath("date", response.meta['date'])
        item.add_value("description",  response.meta['description'])
        item.add_value("url",  url)
        item.add_value("product", response.meta["product"])
        item.add_value("vendor", self.name)
        yield item.load_item()
예제 #31
0
    def parse_download(self, response):
        json_response = json.loads(response.body_as_unicode())

        for file in json_response:
            if file["subFileType"] == "firmware":
                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"])
                item.add_value("version", file["fileVersion"])
                item.add_value("date", datetime.datetime.fromtimestamp(
                    int(file["releaseDate"]) / 1000).strftime(item.context.get("date_fmt")[0]))
                item.add_value("description", file["fileName"])
                item.add_value("url", file["downloadUrl"])
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.vendor)
                yield item.load_item()
예제 #32
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath(".//text()").extract()[0]
            href = link.xpath(".//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    callback=self.parse)
            elif href.endswith(".gz") and ".iso" not in href:
                # strip off multiple file extensions
                basename = os.path.splitext(text)[0]
                while ".img" in basename or ".iso" in basename:
                    basename = os.path.splitext(basename)[0]

                basename = basename.split("-")
                version = FirmwareLoader.find_version_period(basename)

                # attempt to parse filename and generate product/version
                # strings
                remove = [version] if version else []
                for i in range(0, len(basename)):
                    if "BETA" in basename[i]:
                        version += "-%s%s" % (basename[i], basename[i + 1])
                        remove.append(basename[i])
                        remove.append(basename[i + 1])
                    elif "RC" in basename[i]:
                        version += "-%s" % (basename[i])
                        remove.append(basename[i])
                    elif "RELEASE" in basename[i]:
                        remove.append(basename[i])

                basename = [x for x in basename if x not in remove]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                item.add_value("version", version)
                item.add_value("url", href)
                item.add_value("date", item.find_date(
                    link.xpath("following::text()").extract()))
                item.add_value("product", "-".join(basename))
                item.add_value("vendor", self.name)
                yield item.load_item()
예제 #33
0
    def parse(self, response):
        for entry in response.xpath(
                "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"):
            desc = entry.xpath(".//text()").extract()

            for link in entry.xpath(".//a"):
                href = link.xpath("./@href").extract()[0]
                text = link.xpath(".//text()").extract()[0]

                if "_a=download" not in href:
                    yield Request(url=urllib.parse.urljoin(response.url, href),
                                  headers={"Referer": response.url},
                                  meta={"product": text.strip().split(' ')},
                                  callback=self.parse)
                elif "firmware" in text.lower() or "f/w" in text.lower():
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%m/%d/%Y", "%m/%d/%y"])
                    item.add_value("version",
                                   FirmwareLoader.find_version(desc))
                    item.add_value("date", item.find_date(desc))
                    item.add_value("description", text)
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
예제 #34
0
    def parse_product(self, response):
        for section in response.xpath("//ul[@id='tab_conbox']/li"):
            if u"升级软件" in "".join(section.xpath("./h3//text()").extract()):
                for entry in section.xpath(".//dd/a"):
                    text = entry.xpath(".//text()").extract()
                    href = entry.xpath("./@href").extract()[0]

                    desc = text[0]
                    # reverse text because hw version can come before version
                    # e.g. "FH330升级软件(V1.0) V1.0.0.24_CN"
                    if len(text) == 1:
                        text = text[0].split()
                        text.reverse()

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response)
                    item.add_value(
                        "version", FirmwareLoader.find_version_period(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("description", desc)
                    item.add_value("vendor", self.vendor)
                    yield item.load_item()
예제 #35
0
파일: zyxel.py 프로젝트: MikimotoH/scraper
    def parse_product(self, response):
        mib = None

        if not response.body:
            return

        for entry in reversed(response.xpath("//table/tbody/tr")):
            if entry.xpath("./td[contains(@class, 'versionTd')]/select"):
                for i in range(
                        0, len(entry.xpath("./td[contains(@class, 'versionTd')]/select/option"))):
                    desc = entry.xpath(
                        "./td[contains(@class, 'typeTd')]/span/text()").extract()[i].lower()

                    if "firmware" in desc:
                        date = entry.xpath(
                            "./td[contains(@class, 'dateTd')]/span/text()").extract()[i]
                        ver = entry.xpath(
                            "./td[contains(@class, 'versionTd')]/select/option/text()").extract()[i]
                        href = entry.xpath(
                            "./td[contains(@class, 'downloadTd')]/div/a[1]/@data-filelink").extract()[i]

                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"])
                        item.add_value("version", ver)
                        item.add_value("date", date)
                        item.add_value("url", href)
                        item.add_value("product", response.meta["product"])
                        item.add_value("mib", mib)
                        item.add_value("vendor", self.name)
                        yield item.load_item()

            else:
                desc = entry.xpath(
                    "./td[contains(@class, 'typeTd')]//text()").extract()[1].lower()

                if "firmware" in desc:
                    date = entry.xpath(
                        "./td[contains(@class, 'dateTd')]//text()").extract()
                    ver = entry.xpath(
                        "./td[contains(@class, 'versionTd')]//text()").extract()
                    href = entry.xpath(
                        "./td[contains(@class, 'downloadTd')]//a/@data-filelink").extract()[0]

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"])
                    item.add_value("version", ver)
                    item.add_value("date", date)
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("mib", mib)
                    item.add_value("vendor", self.name)
                    yield item.load_item()

                elif "mib" in desc:
                    mib = entry.xpath(
                        "./td[contains(@class, 'downloadTd')]//a/@href").extract()[0]
예제 #36
0
파일: dlink.py 프로젝트: MikimotoH/scraper
    def parse_json(self, response):
        mib = None
        json_response = json.loads(response.body_as_unicode())

        for entry in reversed(json_response["item"]):
            for file in reversed(entry["file"]):
                if file["filetypename"].lower() == "firmware" or file[
                        "isFirmF"] == "1":
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%m/%d/%y"])
                    item.add_value("version",
                                   FirmwareLoader.find_version_period([file["name"]]))
                    item.add_value("date", file["date"])
                    item.add_value("description", file["name"])
                    item.add_value("url", file["url"])
                    item.add_value("build", response.meta["revision"])
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    item.add_value("mib", mib)
                    yield item.load_item()
                elif "MIB" in file["name"]:
                    mib = file["url"]
예제 #37
0
    def parse(self, response):
        for href in response.xpath("//a/@href").extract():
            if href.endswith(".img"):
                basename = href.split("/")[-1].split("-")

                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("url", href)
                item.add_value("product", self.name)
                item.add_value("vendor", self.name)
                item.add_value(
                    "version", basename[-1][0: basename[-1].rfind(".img")])
                yield item.load_item()
예제 #38
0
    def parse(self, response):
        for entry in response.xpath("//table/tr[position() > 3]"):
            if not entry.xpath("./td[2]/a"):
                continue

            text = entry.xpath("./td[2]/a//text()").extract()[0]
            href = entry.xpath("./td[2]/a/@href").extract()[0]
            date = entry.xpath("./td[3]//text()").extract()[0]

            # if "DSM" in response.url:
            if 'DSMUC' in response.url:
                software = 'DSMUC'
            elif 'DSM' in response.url:
                software = "DSM"
            elif 'VSM' in response.url:
                software = "VSM"
            elif "VSF" in response.url:
                software = "VSF"
            elif "SRM" in response.url:
                software = "SRM"
            else:
                continue  # should not happen :-)

            if href.endswith('/'):
                build = None
                version = response.meta.get(
                    "version", FirmwareLoader.find_version_period([text]))
                if not FirmwareLoader.find_version_period([text]):
                    build = text[0: -1]

                yield Request(
                    url=urllib.parse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    meta={"build": build, "version": version},
                    callback=self.parse)
            elif all(not href.lower().endswith(x) for x in [".txt", ".md5", ".torrent"]):
                product = None
                basename = os.path.splitext(text)[0].split("_")

                if software in basename:
                    if response.meta["build"] in basename:
                        basename.remove(response.meta["build"])
                    basename.remove(software)
                    product = " ".join(basename)
                else:
                    # usually "synology_x86_ds13_1504
                    product = basename[-2]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                item.add_value("build", response.meta["build"])
                item.add_value("version", response.meta["version"])
                if software == "DSM":
                    item.add_value("mib", "https://global.download.synology.com/download/Document/Software/"
                                          "DeveloperGuide/Firmware/DSM/All/enu/Synology_MIB_File.zip")
                item.add_value("url", href)
                item.add_value("date", date)
                item.add_value("product", product)
                item.add_value("vendor", self.name)
                yield item.load_item()
예제 #39
0
    def parse_product(self, response):
        if response.xpath("//dl[@id='dlDropDownBox']") and "build" not in response.meta:
            for entry in response.xpath("//dl[@id='dlDropDownBox']//li/a"):
                href = entry.xpath("./@href").extract()[0]
                text = entry.xpath(".//text()").extract()[0]

                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    meta={"product": response.meta["product"], "build": text},
                    headers={"Referer": response.url},
                    callback=self.parse_product,
                )
        else:
            sdk = None

            for href in reversed(response.xpath("//div[@id='content_gpl_code']//a/@href").extract()):
                sdk = href

            for entry in response.xpath("//div[@id='content_firmware']//table"):
                href = entry.xpath("./tbody/tr[1]/th[1]//a/@href").extract()[0]
                text = entry.xpath("./tbody/tr[1]/th[1]//a//text()").extract()[0]
                date = entry.xpath("./tbody/tr[1]/td[1]//text()").extract()

                item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"])
                item.add_value("url", href)
                item.add_value("date", item.find_date(date))
                item.add_value("description", text)
                item.add_value("product", response.meta["product"])
                item.add_value("build", response.meta["build"] if "build" in response.meta else None)
                item.add_value("vendor", self.vendor)
                item.add_value("sdk", sdk)
                yield item.load_item()
예제 #40
0
파일: qnap.py 프로젝트: MikimotoH/scraper
    def parse_product(self, response):
        for row in response.xpath(
                "//div[@class='main_data_block']//table/tr[position() > 1]"):
            text = row.xpath("./td[1]//text()").extract()
            edition = row.xpath("./td[2]//text()").extract()
            date = row.xpath("./td[4]//text()").extract()
            hrefs = row.xpath("./td[5]//a/@href").extract()

            if hrefs:
                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"])
                item.add_value(
                    "version", FirmwareLoader.find_version_period(edition))
                item.add_value("build", FirmwareLoader.find_build(edition))
                item.add_value("url", hrefs[0])
                item.add_value("date", item.find_date(date))
                item.add_value("description", text[2].strip())
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
예제 #41
0
    def parse(self, response):
        if response.xpath("//select[@id='router']"):
            for product in response.xpath(
                    "//select[@id='router']/option/@value").extract():
                if product and product != "allrouters":
                    yield Request(
                        url=urllib.parse.urljoin(
                            response.url, "?router=%s" % (product)),
                        headers={"Referer": response.url},
                        callback=self.parse)

        elif response.xpath("//td[@id='search_main_content']"):
            for link in response.xpath("//td[@id='search_main_content']//a"):
                if link.xpath("./@href"):
                    href = link.xpath("./@href").extract()[0]
                    text = link.xpath(".//text()").extract()

                    if "download.verizon.net" in href and "firmware" in href:
                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response)
                        item.add_value("url", href)
                        item.add_value("description", text[0])
                        item.add_value("vendor", self.name)
                        yield item.load_item()
        else:
            for link in response.xpath("//div[@id='ghfbodycontent']//a"):
                if link.xpath("./@href"):
                    href = link.xpath("./@href").extract()[0]
                    text = link.xpath(".//text()").extract()

                    if "download.verizon.net" in href and "firmware" in href:
                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response)
                        item.add_value("url", href)
                        item.add_value("description", text[0])
                        item.add_value("vendor", self.name)
                        yield item.load_item()
예제 #42
0
    def parse_product(self, response):

        #<h3 class="firm">Firmware</h3>
        if response.xpath('//h3[@class="firm"]').extract():
            for tr in response.xpath(
                    '//*[@id="tab-downloads"]/table[1]/tbody/tr'):
                print tr.extract()
                url = tr.xpath("./td[2]/a/@href").extract()[0]
                date = tr.xpath("./td[4]/text()").extract()[0]
                version = tr.xpath("./td[5]/text()").extract()[0]
                description = tr.xpath("./td[7]/text()").extract()[0]
                product = url.split('-')[0]

                item = FirmwareLoader(item=FirmwareImage(), response=response)

                item.add_value("version", version)
                item.add_value("description", description)
                item.add_value("url", url)
                item.add_value("product", product)
                item.add_value("vendor", self.name)
                yield item.load_item()
예제 #43
0
    def parse_download(self, response):
        for entry in response.xpath("//div[@class='downloadtable']"):
            text = entry.xpath(".//text()").extract()

            if "firmware" in " ".join(text).lower():
                text = entry.xpath(
                    ".//li[@class='maindescription' and position() = 1]//text()").extract()
                date = entry.xpath(
                    ".//li[@class='maindescription' and position() = 2]//text()").extract()
                href = entry.xpath(".//li[@class='maindescription']//a/@onclick").extract()[
                    0].split('\'')[1] + "&button=Continue+with+Download&Continue=yes"

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y"])
                item.add_value("url", href)
                item.add_value("product", response.meta["product"])
                item.add_value("date", item.find_date(date))
                item.add_value("version", FirmwareLoader.find_version(text))
                item.add_value("vendor", self.name)
                yield item.load_item()
예제 #44
0
    def parse_product(self, response):
        mib = None

        if not response.body:
            return

        for entry in reversed(response.xpath("//table/tbody/tr")):
            if entry.xpath("./td[contains(@class, 'versionTd')]/select"):
                for i in range(
                        0,
                        len(
                            entry.xpath(
                                "./td[contains(@class, 'versionTd')]/select/option"
                            ))):
                    desc = entry.xpath(
                        "./td[contains(@class, 'typeTd')]/span/text()"
                    ).extract()[i].lower()

                    if "firmware" in desc:
                        date = entry.xpath(
                            "./td[contains(@class, 'dateTd')]/span/text()"
                        ).extract()[i]
                        ver = entry.xpath(
                            "./td[contains(@class, 'versionTd')]/select/option/text()"
                        ).extract()[i]
                        href = entry.xpath(
                            "./td[contains(@class, 'downloadTd')]/div/a[1]/@data-filelink"
                        ).extract()[i]

                        item = FirmwareLoader(item=FirmwareImage(),
                                              response=response,
                                              date_fmt=["%m-%d-%Y"])
                        item.add_value("version", ver)
                        item.add_value("date", date)
                        item.add_value("url", href)
                        item.add_value("product", response.meta["product"])
                        item.add_value("mib", mib)
                        item.add_value("vendor", self.name)
                        yield item.load_item()

            else:
                desc = entry.xpath("./td[contains(@class, 'typeTd')]//text()"
                                   ).extract()[1].lower()

                if "firmware" in desc:
                    date = entry.xpath(
                        "./td[contains(@class, 'dateTd')]//text()").extract()
                    ver = entry.xpath(
                        "./td[contains(@class, 'versionTd')]//text()").extract(
                        )
                    href = entry.xpath(
                        "./td[contains(@class, 'downloadTd')]//a/@data-filelink"
                    ).extract()[0]

                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%m-%d-%Y"])
                    item.add_value("version", ver)
                    item.add_value("date", date)
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("mib", mib)
                    item.add_value("vendor", self.name)
                    yield item.load_item()

                elif "mib" in desc:
                    mib = entry.xpath(
                        "./td[contains(@class, 'downloadTd')]//a/@href"
                    ).extract()[0]
예제 #45
0
파일: openwrt.py 프로젝트: lxonz/IoT-vulhub
    def parse_url(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract_first()
            href = link.xpath("@href").extract_first()

            if text is None and href == "/":
                # <a href="/"><em>(root)</em></a>
                continue

            if ".." in href:
                continue
            elif href.endswith('/'):
                if "package/" not in text:
                    product = "%s-%s" % (
                        response.meta["product"], text[0:-1]
                    ) if "product" in response.meta else text[0:-1]

                    yield Request(url=urllib.parse.urljoin(response.url, href),
                                  headers={"Referer": response.url},
                                  meta={
                                      "version": response.meta["version"],
                                      "product": product
                                  },
                                  callback=self.parse_url)
            elif any(
                    href.endswith(x)
                    for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%d-%b-%Y"])
                item.add_value("version", response.meta["version"])
                item.add_value("url", href)
                item.add_value(
                    "date",
                    item.find_date(link.xpath("following::text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
예제 #46
0
    def parse(self, response):
        for entry in response.xpath("//table/tr[position() > 3]"):
            if not entry.xpath("./td[2]/a"):
                continue

            text = entry.xpath("./td[2]/a//text()").extract()[0]
            href = entry.xpath("./td[2]/a/@href").extract()[0]
            date = entry.xpath("./td[3]//text()").extract()[0]

            if "DSM" in response.url:
                if href.endswith('/'):
                    build = None
                    version = response.meta.get(
                        "version", FirmwareLoader.find_version_period([text]))
                    if not FirmwareLoader.find_version_period([text]):
                        build = text[0: -1]

                    yield Request(
                        url=urlparse.urljoin(response.url, href),
                        headers={"Referer": response.url},
                        meta={"build": build, "version": version},
                        callback=self.parse)
                elif all(not href.lower().endswith(x) for x in [".txt", ".md5", ".torrent"]):
                    product = None
                    basename = os.path.splitext(text)[0].split("_")

                    if "DSM" in basename:
                        if response.meta["build"] in basename:
                            basename.remove(response.meta["build"])
                        basename.remove("DSM")
                        product = " ".join(basename)
                    else:
                        product = basename[-2]

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                    item.add_value("build", response.meta["build"])
                    item.add_value("version", response.meta["version"])
                    item.add_value(
                        "mib", "http://dedl.synology.com/download/Document/MIBGuide/Synology_MIB_File.zip")
                    item.add_value("url", href)
                    item.add_value("date", date)
                    item.add_value("product", product)
                    item.add_value("vendor", self.name)
                    yield item.load_item()
            elif "VSFirmware" in response.url:
                if href.endswith('/'):
                    version, build = text[0: -1].split("-")

                    yield Request(
                        url=urlparse.urljoin(response.url, href),
                        headers={"Referer": response.url},
                        meta={"build": build, "version": version},
                        callback=self.parse)
                elif all(not href.lower().endswith(x) for x in [".txt", ".md5", ".torrent"]):
                    basename = os.path.splitext(text)[0].split("_")

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                    item.add_value("build", response.meta["build"])
                    item.add_value("version", response.meta["version"])
                    item.add_value("url", href)
                    item.add_value("date", date)
                    item.add_value("product", basename[0])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
예제 #47
0
    def parse_json(self, response):
        json_response = json.loads(response.body_as_unicode())

        if "products" in json_response:
            for product in json_response["products"]:
                yield Request(url=urllib.parse.urljoin(
                    response.url, "?product=%s" % (product["slug"])),
                              headers={
                                  "Referer": response.url,
                                  "X-Requested-With": "XMLHttpRequest"
                              },
                              meta={"product": product["slug"]},
                              callback=self.parse_json)

        if "url" in response.meta:
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt=["%Y-%m-%d"])
            item.add_value("url", response.meta["url"])
            item.add_value("product", response.meta["product"])
            item.add_value("date", response.meta["date"])
            item.add_value("description", response.meta["description"])
            item.add_value("build", response.meta["build"])
            item.add_value("version", response.meta["version"])
            item.add_value("sdk", json_response["download_url"])
            item.add_value("vendor", self.name)
            yield item.load_item()

        elif "product" in response.meta:
            for entry in json_response["downloads"]:
                if entry["category__slug"] == "firmware":

                    if entry["sdk__id"]:
                        yield Request(url=urllib.parse.urljoin(
                            response.url,
                            "?gpl=%s&eula=True" % (entry["sdk__id"])),
                                      headers={
                                          "Referer": response.url,
                                          "X-Requested-With": "XMLHttpRequest"
                                      },
                                      meta={
                                          "product": response.meta["product"],
                                          "date": entry["date_published"],
                                          "build": entry["build"],
                                          "url": entry["file_path"],
                                          "version": entry["version"],
                                          "description": entry["name"]
                                      },
                                      callback=self.parse_json)
                    else:
                        item = FirmwareLoader(item=FirmwareImage(),
                                              response=response,
                                              date_fmt=["%Y-%m-%d"])
                        item.add_value("url", entry["file_path"])
                        item.add_value("product", response.meta["product"])
                        item.add_value("date", entry["date_published"])
                        item.add_value("description", entry["name"])
                        item.add_value("build", entry["build"])
                        item.add_value("version", entry["version"])
                        item.add_value("vendor", self.name)
                        yield item.load_item()
예제 #48
0
    def parse_download(self, response):
        for link in response.xpath("//div[@id='auto']//a"):
            href = link.xpath("./@href").extract()[0]
            text = link.xpath(".//text()").extract()[0]

            if ("downloads" in href or "firmware" in href) and \
                not href.endswith(".html"):
                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("version", response.meta["version"])
                item.add_value("url", href)
                item.add_value("description", text)
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
예제 #49
0
    def parse_json(self, response):
        resp = json.loads(response.text)
        self.logger.debug(resp)
        for product in resp:
            name = product['showName'].strip()
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt=["%Y%m%d"])

            # Model, Version, Date, Build
            self.logger.debug("Parsing '%s'" % name)
            match = re.search(r'^(.+) (V[\d\.]+)([^\d]+)(\d+)_([\d\.]+)$',
                              name)

            if match:
                self.logger.debug(match.groups())
                item.add_value("product", match[1])
                item.add_value("version", match[2])
                date = match[4]
                if len(date) == 6:
                    date = "20" + date
                item.add_value("date", date)
                item.add_value("build", match[5])
            else:
                # TL-NVR5104 V1.0_171205.标准版
                match = re.search(
                    r'^(.+)[_ ]([vV][\d\.]+)([^\d]*)_([\d]+)([^\d]+)$', name)
                if match:
                    self.logger.debug(match.groups())
                    item.add_value("product", match[1])
                    item.add_value("version", match[2])
                    date = match[4]
                    if len(date) == 6:
                        date = "20" + date
                    item.add_value("date", date)
                    item.add_value("build", match[5])
                else:
                    # TL-IPC545K(P) V3.0_180227(1.0.14)标准版
                    match = re.search(
                        r'^(.+)[_ ](V[\d\.]+)_(\d+)(([\d\.]+))([^\d]+)$', name)
                    if match:
                        self.logger.debug(match.groups())
                        item.add_value("product", match[1])
                        item.add_value("version", match[2])
                        date = match[3]
                        if len(date) == 6:
                            date = "20" + date
                        item.add_value("date", date)
                        item.add_value("build", match[4] + ' ' + match[5])
                    else:
                        self.logger.debug("No match for %s" % name)
            print('http://service.tp-link.com.cn/download/' +
                  quote(product['fileName']))
            item.add_value(
                "url", 'http://service.tp-link.com.cn/download/' +
                quote(product['fileName']))
            item.add_value("description", name)
            item.add_value("vendor", self.vendor)
            yield item.load_item()
예제 #50
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath(".//text()").extract()[0]
            href = link.xpath(".//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                yield Request(
                    url=urljoin(response.url, href),
                    headers={"Referer": response.url},
                    callback=self.parse)
            elif href.endswith(".gz") and ".iso" not in href:
                # strip off multiple file extensions
                basename = os.path.splitext(text)[0]
                while ".img" in basename or ".iso" in basename:
                    basename = os.path.splitext(basename)[0]

                basename = basename.split("-")
                version = FirmwareLoader.find_version_period(basename)

                # attempt to parse filename and generate product/version
                # strings
                remove = [version] if version else []
                for i in range(0, len(basename)):
                    if "BETA" in basename[i]:
                        version += "-%s%s" % (basename[i], basename[i + 1])
                        remove.append(basename[i])
                        remove.append(basename[i + 1])
                    elif "RC" in basename[i]:
                        version += "-%s" % (basename[i])
                        remove.append(basename[i])
                    elif "RELEASE" in basename[i]:
                        remove.append(basename[i])

                basename = [x for x in basename if x not in remove]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                item.add_value("version", version)
                item.add_value("url", href)
                item.add_value("date", item.find_date(
                    link.xpath("following::text()").extract()))
                item.add_value("product", "-".join(basename))
                item.add_value("vendor", self.name)
                yield item.load_item()
예제 #51
0
    def parse_product(self, response):
        for section in response.xpath("//ul[@id='tab_conbox']/li"):
            if u"升级软件" in "".join(section.xpath("./h3//text()").extract()):
                for entry in section.xpath(".//dd/a"):
                    text = entry.xpath(".//text()").extract()
                    href = entry.xpath("./@href").extract()[0]

                    desc = text[0]
                    # reverse text because hw version can come before version
                    # e.g. "FH330升级软件(V1.0) V1.0.0.24_CN"
                    if len(text) == 1:
                        text = text[0].split()
                        text.reverse()

                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response)
                    item.add_value("version",
                                   FirmwareLoader.find_version_period(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("description", desc)
                    item.add_value("vendor", self.vendor)
                    yield item.load_item()
예제 #52
0
    def parse_kb(self, response):
        # initial html tokenization to find regions segmented by e.g. "======"
        # or "------"
        filtered = response.xpath(
            "//div[@class='sfdc_richtext']").extract()[0].split("=-")

        for entry in [x and x.strip() for x in filtered]:
            resp = HtmlResponse(url=response.url,
                                body=entry,
                                encoding=response.encoding)

            for link in resp.xpath("//a"):
                href = link.xpath("@href").extract()[0]
                if "cache-www" in href:
                    text = resp.xpath("//text()").extract()
                    text_next = link.xpath("following::text()").extract()

                    item = FirmwareLoader(
                        item=FirmwareImage(),
                        response=response,
                        date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"])

                    version = FirmwareLoader.find_version_period(text_next)
                    if not version:
                        version = FirmwareLoader.find_version_period(text)

                    item.add_value("version", version)
                    item.add_value("date", item.find_date(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
예제 #53
0
    def parse(self, response):
        if response.xpath("//form[@name='UCagreement']"):
            for href in response.xpath(
                    "//div[@id='productAndDoc']").extract()[0].split('"'):
                if "downloads.polycom.com" in href:
                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"])
                    item.add_value("version", response.meta["version"])
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", response.meta["date"])
                    item.add_value("description", response.meta["description"])
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()

        elif response.xpath("//div[@id='ContentChannel']"):
            for entry in response.xpath("//div[@id='ContentChannel']//li"):
                if not entry.xpath("./a"):
                    continue

                text = entry.xpath("./a//text()").extract()[0]
                href = entry.xpath("./a/@href").extract()[0].strip()
                date = entry.xpath("./span//text()").extract()

                path = urlparse.urlparse(href).path

                if any(x in text.lower() for x in ["end user license agreement", "eula", "release notes",
                                                   "mac os", "windows", "guide", "(pdf)", "sample"]) or href.endswith(".pdf"):
                    continue

                elif any(path.endswith(x) for x in [".htm", ".html"]) or "(html)" in text.lower():
                    yield Request(
                        url=urlparse.urljoin(
                            response.url, PolycomSpider.fix_url(href)),
                        meta={"product": response.meta["product"] if "product" in response.meta else text,
                              "date": date, "version": FirmwareLoader.find_version_period([text]), "description": text},
                        headers={"Referer": response.url},
                        callback=self.parse)

                elif path:
                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"])
                    item.add_value(
                        "version", FirmwareLoader.find_version_period([text]))
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", item.find_date(date))
                    item.add_value("description", text)
                    # item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
예제 #54
0
    def parse_product(self, response):
        
        #<h3 class="firm">Firmware</h3>
        if response.xpath('//h3[@class="firm"]').extract():
            for tr in response.xpath('//*[@id="tab-downloads"]/table[1]/tbody/tr'):
                print tr.extract()
                url = tr.xpath("./td[2]/a/@href").extract()[0]
                date = tr.xpath("./td[4]/text()").extract()[0]
                version = tr.xpath("./td[5]/text()").extract()[0]
                description = tr.xpath("./td[7]/text()").extract()[0]
                product = url.split('-')[0]

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response)
                                      
                item.add_value("version", version)
                item.add_value("description", description)
                item.add_value("url", url)
                item.add_value("product", product)
                item.add_value("vendor", self.name)
                yield item.load_item()
예제 #55
0
    def parse_product(self, response):

        #<a href="#Firmware"><span>Firmware</span></a>
        if not response.xpath("//a[@href=\"#Firmware\"]").extract():
            yield None

        description = response.xpath(
            "//div[@class=\"product-name\"]//strong/text()").extract()[0]
        url = response.xpath(
            "//*[@id=\"content_Firmware\"]/table/tbody/tr[1]/th/a/@href"
        ).extract()[0]
        date = response.xpath(
            "//*[@id=\"content_Firmware\"]/table/tbody/tr[2]/td[1]/span[2]/text()"
        ).extract()[0]

        item = FirmwareLoader(item=FirmwareImage(),
                              response=response,
                              date_fmt=["%d/%m/%y"])

        item.add_value("url", url)
        item.add_value("date", item.find_date(date))
        item.add_value("description", description)
        item.add_value("product", response.meta["product"])
        item.add_value("version", response.meta["version"])
        item.add_value("vendor", self.vendor)
        yield item.load_item()
예제 #56
0
파일: belkin.py 프로젝트: MikimotoH/scraper
    def parse_kb(self, response):
        # initial html tokenization to find regions segmented by e.g. "======"
        # or "------"
        filtered = response.xpath(
            "//div[@class='sfdc_richtext']").extract()[0].split("=-")

        for entry in [x and x.strip() for x in filtered]:
            resp = HtmlResponse(url=response.url, body=entry,
                                encoding=response.encoding)

            for link in resp.xpath("//a"):
                href = link.xpath("@href").extract()[0]
                if "cache-www" in href:
                    text = resp.xpath("//text()").extract()
                    text_next = link.xpath("following::text()").extract()

                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%b %d, %Y", "%B %d, %Y",
                                                    "%m/%d/%Y"])

                    version = FirmwareLoader.find_version_period(text_next)
                    if not version:
                        version = FirmwareLoader.find_version_period(text)

                    item.add_value("version", version)
                    item.add_value("date", item.find_date(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
예제 #57
0
    def parse_product(self, response):
        import re
        #/cn/Uploads/files/20161024/K1_V22.4.2.15.bin
        print response.text
        path = re.findall(u"(/cn/Uploads/files/.*?\.bin)", response.text)[0]
        url = "http://www.phicomm.com/{}".format(path)

        item = FirmwareLoader(
            item=FirmwareImage())
        item.add_value("url", url),
        item.add_value("product", response.meta['product']),
        item.add_value("date", response.meta['date']),
        item.add_value("version", response.meta['version']),
        item.add_value("vendor", self.vendor),
        item.add_value("description", response.meta['description']),
            
        yield item.load_item()
예제 #58
0
    def parse(self, response):
        for link in response.xpath("//table//tr"):
            if not link.xpath("./td[2]/a"):
                continue

            text = link.xpath("./td[2]/a/text()").extract()[0]
            href = link.xpath("./td[2]//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                build = response.meta.get("build", None)
                product = response.meta.get("product", None)

                if not product:
                    product = text
                elif not build:
                    build = text.replace("build", "")

                yield Request(url=urlparse.urljoin(response.url, href),
                              headers={"Referer": response.url},
                              meta={
                                  "build": build,
                                  "product": product
                              },
                              callback=self.parse)
            elif any(
                    href.endswith(x)
                    for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%Y-%m-%d"])
                item.add_value("build", response.meta["build"])
                item.add_value("url", href)
                item.add_value(
                    "version",
                    FirmwareLoader.find_version_period(
                        os.path.splitext(text)[0].split("-")))
                item.add_value(
                    "date",
                    item.find_date(link.xpath("./td[3]/text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
예제 #59
0
파일: se.py 프로젝트: harry1080/IoT-vulhub
    def parse_product_sw_fw(self, response):
        product = response.meta['product']
        fw_sect = None

        #inspect_response(response, self)
        col_selector_map = {}
        # Find the "Firmware" section.  NOTE: whitespace in the class is intentional
        for section in response.css('div.docs-table__section '):
            for col in section.css('div.docs-table__column-name'):
                col_text = col.xpath('.//text()').extract_first().strip()
                if len(col_text) > 1:
                    col_selector_map[col_text] = section
        try:
            fw_sect = col_selector_map[u'Firmware']
        except KeyError:
            logging.debug("Did not find a 'Firmware' section in the downloads for %s", product)
            return

        # Iterate Firmware rows
        for fw_row in fw_sect.css('div.docs-table__row'):
            fw_version, fw_href, fw_date, fw_desc = self.extract_fw_info(fw_row, response)
            if fw_href is None:
                continue

            item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%m/%d/%y"])
            item.add_value('product', product)
            item.add_value('vendor', self.name)
            item.add_value('url', fw_href)
            item.add_value('description', fw_desc)
            item.add_value('date', fw_date)
            yield item.load_item()