コード例 #1
0
    def parse_kb(self, response):
        mib = None

        # need to perform some nasty segmentation because different firmware versions are not clearly separated
        # reverse order to get MIB before firmware items
        for entry in reversed(
                response.xpath(
                    "//div[@id='support-article-downloads']/div/p")):
            for segment in reversed(entry.extract().split("<br><br>")):
                resp = HtmlResponse(url=response.url,
                                    body=segment,
                                    encoding=response.encoding)
                for href in resp.xpath("//a/@href").extract():
                    text = resp.xpath("//text()").extract()

                    if "MIBs" in href:
                        mib = href

                    elif "firmware" in href:
                        text = resp.xpath("//text()").extract()

                        item = FirmwareLoader(item=FirmwareImage(),
                                              response=resp,
                                              date_fmt=["%m/%d/%Y"])
                        item.add_value("date", item.find_date(text))
                        item.add_xpath("url", "//a/@href")
                        item.add_value("mib", mib)
                        item.add_value("product", response.meta["product"])
                        item.add_value("vendor", self.name)
                        item.add_value(
                            "version",
                            FirmwareLoader.find_version_period(text))
                        yield item.load_item()
コード例 #2
0
ファイル: openwrt.py プロジェクト: Anderson-Liu/scraper
    def parse_url(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract()[0]
            href = link.xpath("@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                if "package/" not in text:
                    product = "%s-%s" % (
                        response.meta["product"], text[0:-1]
                    ) if "product" in response.meta else text[0:-1]

                    yield Request(url=urljoin(response.url, href),
                                  headers={"Referer": response.url},
                                  meta={
                                      "version": response.meta["version"],
                                      "product": product
                                  },
                                  callback=self.parse_url)
            elif any(
                    href.endswith(x)
                    for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%d-%b-%Y"])
                item.add_value("version", response.meta["version"])
                item.add_value("url", href)
                item.add_value(
                    "date",
                    item.find_date(link.xpath("following::text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
コード例 #3
0
    def parse_product(self, response):
        for a in response.xpath("//div[@id='mainbox']//dd/a"):
            url = a.xpath("./@href").extract()[0]
            title = a.xpath("./text()").extract()[0]
            description = title

            items = title.split(' ')
            product = items[0]
            version = items[-1]

            #FH456V1.0 Firmware V10.1.1.1_EN
            #E101(V2.0) Firmware V1.10.0.1_EN
            #G3(V2.0) Firmware V2.0.0.1_EN
            #O3 Firmware V1.0.0.3_EN
            #i6 Firmware V1.0.0.9(3857)_EN
            import re
            p = ur'^(?P<product>([a-uw-zA-UW-Z0-9])+)[\(\uff08]?(V\d\.0)?'
            try:
                ret = re.search(p, items[0].decode('utf-8'))

                if ret:
                    product = ret.group('product')
            except:
                product = item[0]

            item = FirmwareLoader(item=FirmwareImage(), response=response)
            item.add_value("version", version)
            item.add_value("url", url)
            item.add_value("product", product)
            item.add_value("vendor", self.vendor)
            yield item.load_item()
コード例 #4
0
ファイル: qnap.py プロジェクト: lxonz/IoT-vulhub
    def parse_model_files(self, response):
        meta = response.meta

        # Due to Python2 and unicode objects, we're using response body here.  Issues are from the 'remarks' fields.
        try:
            model_files = json.loads(response.body)['downloads']['firmware']
        except KeyError:
            logging.info("No downloadable firmware for %s", meta)
            return

        for _, fw_info in list(model_files.items()):
            href = fw_info['links'][
                'global']  # options: {'global', 'europe', 'usa'}
            if not href.startswith("https://") and not href.startswith(
                    "http://"):
                href = urllib.urljoin("https://", href)

            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt="%Y-%m-%d")
            item.add_value('product', meta['name'])
            item.add_value('vendor', self.name)
            item.add_value('description', fw_info['releasenote'])
            item.add_value('date', fw_info['published_at'])
            item.add_value('version', fw_info['version'])
            item.add_value('url', href)
            yield item.load_item()
コード例 #5
0
    def parse_product(self, response):

        #<a href="#Firmware"><span>Firmware</span></a>
        if not response.xpath("//a[@href=\"#Firmware\"]").extract():
            yield None

        description = response.xpath(
            "//div[@class=\"product-name\"]//strong/text()").extract()[0]
        url = response.xpath(
            "//*[@id=\"content_Firmware\"]/table/tbody/tr[1]/th/a/@href"
        ).extract()[0]
        date = response.xpath(
            "//*[@id=\"content_Firmware\"]/table/tbody/tr[2]/td[1]/span[2]/text()"
        ).extract()[0]

        item = FirmwareLoader(item=FirmwareImage(),
                              response=response,
                              date_fmt=["%d/%m/%y"])

        item.add_value("url", url)
        item.add_value("date", item.find_date(date))
        item.add_value("description", description)
        item.add_value("product", response.meta["product"])
        item.add_value("version", response.meta["version"])
        item.add_value("vendor", self.vendor)
        yield item.load_item()
コード例 #6
0
    def parse_product(self, response):
        # types: firmware = 20, gpl source = 30, bios = 3
        for entry in response.xpath(
                "//div[@id='div_type_20']/div[@id='download-os-answer-table']"):
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response, date_fmt=["%Y/%m/%d"])

            version = FirmwareLoader.find_version_period(
                entry.xpath("./p//text()").extract())
            gpl = None

            # grab first download link (e.g. DLM instead of global or p2p)
            href = entry.xpath("./table//tr[3]//a/@href").extract()[0]

            # attempt to find matching source code entry
            if version:
                for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"):
                    if version in "".join(source.xpath("./p//text()").extract()):
                        gpl = source.xpath("./table//tr[3]//a/@href").extract()[0]

            item.add_value("version", version)
            item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract()))
            item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract()))
            item.add_value("url", href)
            item.add_value("sdk", gpl)
            item.add_value("product", response.meta["product"])
            item.add_value("vendor", self.name)
            yield item.load_item()
コード例 #7
0
    def parse_product(self, response):
        text = response.xpath(
            "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract(
            )[0].encode("ascii", errors="ignore")
        date = response.xpath(
            "//div[@class='download']/table[1]//tr[4]/td[2]//text()").extract(
            )
        href = response.xpath(
            "//div[@class='download']/table[1]//tr[5]/td[2]/a/@href").extract(
            )[0]
        desc = response.xpath(
            "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract(
            )[0].encode("utf-8")

        build = None
        product = None
        if "_" in text:
            build = text.split("_")[1]
            product = text.split("_")[0]
        elif " " in text:
            product = text.split(" ")[0]

        item = FirmwareLoader(item=FirmwareImage(),
                              response=response,
                              date_fmt=["%Y/%m/%d"])
        item.add_value("url", href.encode("utf-8"))
        item.add_value("date", item.find_date(date))
        item.add_value("description", desc)
        item.add_value("build", build)
        item.add_value("product", product)
        item.add_value("vendor", self.vendor)
        yield item.load_item()
コード例 #8
0
ファイル: 360.py プロジェクト: Anderson-Liu/scraper
    def parse_product(self, response):
        js = response.text
        if js.startswith("var commonInfo"):
            print(response.url)
            print(js)

            p_product = u"id:\"(?P<product>.*?)\""
            p_description = u"title:\"(?P<description>.*?)\""
            p_version = u"romVersions:\"(?P<version>.*?)\""
            p_url = u"romUrl:\"(?P<url>.*?)\""
            p_date = u"updateDate:\"(?P<date>.*?)\""

            import re
            products = re.findall(p_product, js)
            descriptions = re.findall(p_description, js)
            versions = re.findall(p_version, js)
            urls = re.findall(p_url, js)
            dates = re.findall(p_date, js)

            for i in range(len(products)):
                product = products[i]
                url = urls[i]
                version = versions[i]
                description = descriptions[i]
                date = dates[i]

                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("url", url)
                item.add_value("version", version)
                item.add_value("product", product)
                item.add_value("description", description)
                item.add_value("date", date)
                item.add_value("vendor", self.name)
                yield item.load_item()
コード例 #9
0
    def parse_download(self, response):
        for entry in response.xpath("//div[@class='downloadtable']"):
            text = entry.xpath(".//text()").extract()

            if "firmware" in " ".join(text).lower():
                text = entry.xpath(
                    ".//li[@class='maindescription' and position() = 1]//text()"
                ).extract()
                date = entry.xpath(
                    ".//li[@class='maindescription' and position() = 2]//text()"
                ).extract()
                href = entry.xpath(
                    ".//li[@class='maindescription']//a/@onclick"
                ).extract()[0].split(
                    '\'')[1] + "&button=Continue+with+Download&Continue=yes"

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%m/%d/%Y"])
                item.add_value("url", href)
                item.add_value("product", response.meta["product"])
                item.add_value("date", item.find_date(date))
                item.add_value("version", FirmwareLoader.find_version(text))
                item.add_value("vendor", self.name)
                yield item.load_item()
コード例 #10
0
    def parse_download(self, response):
        for firmware in response.xpath(
                "//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']"
        ):
            product = response.xpath(
                "//div[@class='prodNavHeaderBody']//text()").extract(
                )[0].replace(" Support & Drivers", "")
            date = firmware.xpath(
                ".//ul[@class='dateVersion']//strong/text()").extract()
            version = firmware.xpath(
                ".//ul[@class='dateVersion']//strong/text()").extract()
            href = firmware.xpath(".//a/@href").extract()[0].replace(
                "file-download", "file-redirect")
            text = firmware.xpath(".//a//text()").extract()[0]

            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt=["%b %d, %Y"])
            item.add_value("url", href)
            item.add_value("product", product)
            item.add_value("date", item.find_date(date))
            item.add_value("description", text)
            item.add_value("version", item.find_version_period(version))
            item.add_value("vendor", self.name)
            yield item.load_item()
コード例 #11
0
ファイル: belkin.py プロジェクト: harry1080/IoT-vulhub
    def parse_kb(self, response):
        # initial html tokenization to find regions segmented by e.g. "======"
        # or "------"
        filtered = response.xpath(
            "//div[@class='sfdc_richtext']").extract()[0].split("=-")

        for entry in [x and x.strip() for x in filtered]:
            resp = HtmlResponse(url=response.url,
                                body=entry,
                                encoding=response.encoding)

            for link in resp.xpath("//a"):
                href = link.xpath("@href").extract()[0]
                if "cache-www" in href:
                    text = resp.xpath("//text()").extract()
                    text_next = link.xpath("following::text()").extract()

                    item = FirmwareLoader(
                        item=FirmwareImage(),
                        response=response,
                        date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"])

                    version = FirmwareLoader.find_version_period(text_next)
                    if not version:
                        version = FirmwareLoader.find_version_period(text)

                    item.add_value("version", version)
                    item.add_value("date", item.find_date(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
コード例 #12
0
ファイル: tp-link_zh-cn.py プロジェクト: cpbscholten/scraper
    def parse_json(self, response):
        resp = json.loads(response.text)
        self.logger.debug(resp)
        for product in resp:
            name = product['showName'].strip()
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt=["%Y%m%d"])

            # Model, Version, Date, Build
            self.logger.debug("Parsing '%s'" % name)
            match = re.search(r'^(.+) (V[\d\.]+)([^\d]+)(\d+)_([\d\.]+)$',
                              name)

            if match:
                self.logger.debug(match.groups())
                item.add_value("product", match[1])
                item.add_value("version", match[2])
                date = match[4]
                if len(date) == 6:
                    date = "20" + date
                item.add_value("date", date)
                item.add_value("build", match[5])
            else:
                # TL-NVR5104 V1.0_171205.标准版
                match = re.search(
                    r'^(.+)[_ ]([vV][\d\.]+)([^\d]*)_([\d]+)([^\d]+)$', name)
                if match:
                    self.logger.debug(match.groups())
                    item.add_value("product", match[1])
                    item.add_value("version", match[2])
                    date = match[4]
                    if len(date) == 6:
                        date = "20" + date
                    item.add_value("date", date)
                    item.add_value("build", match[5])
                else:
                    # TL-IPC545K(P) V3.0_180227(1.0.14)标准版
                    match = re.search(
                        r'^(.+)[_ ](V[\d\.]+)_(\d+)(([\d\.]+))([^\d]+)$', name)
                    if match:
                        self.logger.debug(match.groups())
                        item.add_value("product", match[1])
                        item.add_value("version", match[2])
                        date = match[3]
                        if len(date) == 6:
                            date = "20" + date
                        item.add_value("date", date)
                        item.add_value("build", match[4] + ' ' + match[5])
                    else:
                        self.logger.debug("No match for %s" % name)
            print('http://service.tp-link.com.cn/download/' +
                  quote(product['fileName']))
            item.add_value(
                "url", 'http://service.tp-link.com.cn/download/' +
                quote(product['fileName']))
            item.add_value("description", name)
            item.add_value("vendor", self.vendor)
            yield item.load_item()
コード例 #13
0
ファイル: tenda_zh.py プロジェクト: harry1080/IoT-vulhub
    def parse_product(self, response):
        url = response.xpath("//div[@class='thumbnail']//a/@href").extract()[0]

        item = FirmwareLoader(item=FirmwareImage(), response=response)
        item.add_value("version", response.meta['version'])
        item.add_value("url", url)
        item.add_value("product", response.meta['product'])
        item.add_value("vendor", self.vendor)
        yield item.load_item()
コード例 #14
0
 def download_item(self, response):
     url = "https:" + str(response.xpath("//div[@class='downbtns']/a/@href").extract_first())
     item = FirmwareLoader(item=FirmwareImage(),
                           response=response,
                           date_fmt=["%Y-%m-%d"])
     item.add_value("url", url)
     item.add_value("version", response.meta["version"])
     item.add_value("date", response.meta["date"])
     item.add_value("product", response.meta["product"])
     item.add_value("vendor", self.name)
     yield item.load_item()
コード例 #15
0
    def parse(self, response):
        if "product" in response.meta:
            for entry in response.xpath("//div[@class='tab-content']//tr")[1:]:

                version = entry.xpath("./td[1]//a//text()").extract_first()
                url = entry.xpath("./td[2]//a/@href").extract_first()
                if version is None or url is None:
                    continue

                # remove unnecessary files
                to_remove_list = [
                    "end user license agreement", "eula", "release notes",
                    "mac os", "windows", "guide", "(pdf)", "sample", "client",
                    "manager", "software", "virtual", "control_panel",
                    "activexbypass"
                ]
                if any(x in url.lower() for x in to_remove_list) \
                        or any(x in version.lower() for x in to_remove_list) \
                        or any(url.endswith(x) for x in ["htm", "html", "pdf", "ova", ".plcm.vc"]):
                    continue

                url = urllib.parse.urljoin(response.url,
                                           PolycomSpider.fix_url(url)),

                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("version", version)
                item.add_value("url", url)
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()

        # all entries on the product overview pages
        elif response.xpath("//div[@class='product-listing']"
                            ) and "product" not in response.meta:
            for entry in response.xpath("//div[@class='product-listing']//li"):
                if not entry.xpath("./a"):
                    continue

                text = entry.xpath("./a//text()").extract_first()
                href = entry.xpath("./a/@href").extract_first().strip()
                # date = entry.xpath("./span//text()").extract()

                if any(x in text.lower() for x in ["advisories", "support", "notices", "features"]) \
                        or href.endswith(".pdf"):
                    continue

                path = urllib.parse.urlparse(href).path
                if any(path.endswith(x)
                       for x in [".htm", ".html"]) or "(html)" in text.lower():
                    yield Request(url=urllib.parse.urljoin(
                        response.url, PolycomSpider.fix_url(href)),
                                  meta={"product": text},
                                  headers={"Referer": response.url},
                                  callback=self.parse)
コード例 #16
0
ファイル: netcore.py プロジェクト: lxonz/IoT-vulhub
    def parse_product(self, response):
        url = self.firmware_url + response.xpath(
            '//a[@id="downLoadHref"]/@href').extract()[0]

        item = FirmwareLoader(item=FirmwareImage(), response=response)
        item.add_xpath("date", response.meta['date'])
        item.add_value("description", response.meta['description'])
        item.add_value("url", url)
        item.add_value("product", response.meta["product"])
        item.add_value("vendor", self.name)
        yield item.load_item()
コード例 #17
0
    def parse(self, response):
        for a in response.xpath("//table//tr//td[2]//a"):
            title = a.xpath('./@title').extract()[0]
            url = a.xpath('./@href').extract()[0]

            item = FirmwareLoader(item=FirmwareImage(), response=response)
            item.add_value("url", url)
            item.add_value("product", self.parse_product(title))
            item.add_value("description", title)
            item.add_value("vendor", self.name)
            yield item.load_item()
コード例 #18
0
    def parse(self, response):
        for href in response.xpath("//a/@href").extract():
            if href.endswith(".img"):
                basename = href.split("/")[-1].split("-")

                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("url", href)
                item.add_value("product", self.name)
                item.add_value("vendor", self.name)
                item.add_value(
                    "version", basename[-1][0: basename[-1].rfind(".img")])
                yield item.load_item()
コード例 #19
0
 def parse(self, response):
     for href in response.xpath("//a/@href").extract():
         if href == ".." or href == "/":
             continue
         elif href.endswith(".bin") or href.endswith(".upg"):
             item = FirmwareLoader(item=FirmwareImage(), response=response)
             item.add_value("url", href)
             item.add_value("vendor", self.name)
             yield item.load_item()
         elif "/" in href:
             yield Request(url=urllib.parse.urljoin(response.url, href),
                           headers={"Referer": response.url},
                           callback=self.parse)
コード例 #20
0
ファイル: foscam.py プロジェクト: zyw-200/IOTFuzzer_Full
    def parse(self, response):
        for i in range(
                0, len(response.xpath("//div[@id='main_right']/span[1]/p")),
                7):
            prods = response.xpath(
                "//div[@id='main_right']/span[1]//p[%d]/text()" %
                (i + 2)).extract()[0].split("\r\n")

            for product in [x for x in prods]:
                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_xpath(
                    "version",
                    "//div[@id='main_right']/span[1]//p[%d]/text()" % (i + 3))
                item.add_xpath(
                    "url",
                    "//div[@id='main_right']/span[1]//p[%d]/a/@href" % (i + 7))
                item.add_value("product", product)
                item.add_value("vendor", self.name)
                yield item.load_item()

        for i in range(
                0, len(response.xpath("//div[@id='main_right']/span[2]/p")),
                5):
            prods = response.xpath(
                "//div[@id='main_right']/span[2]//p[%d]/text()" %
                (i + 2)).extract()[0].split(",")

            for product in [x for x in prods]:
                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_xpath(
                    "version",
                    "//div[@id='main_right']/span[2]//p[%d]/text()" % (i + 3))
                item.add_xpath(
                    "url",
                    "//div[@id='main_right']/span[2]//p[%d]/a/@href" % (i + 5))
                item.add_value("product", product)
                item.add_value("vendor", self.name)
                yield item.load_item()
コード例 #21
0
    def parse_download(self, response):
        for link in response.xpath("//div[@id='auto']//a"):
            href = link.xpath("./@href").extract()[0]
            text = link.xpath(".//text()").extract()[0]

            if ("downloads" in href or "firmware" in href) and \
                not href.endswith(".html"):
                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("version", response.meta["version"])
                item.add_value("url", href)
                item.add_value("description", text)
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
コード例 #22
0
 def parse_link(self, response):
     # some items will require captcha authentication and pass a cookie e.g.
     # DownloadAuthorizationToken =
     # 7CB8169BFC8848B097BB071118F9E067431714963E3A74A45C8883A70654999980D7F1412CB98B87C802403D74B6A2611122BB3CCEE0B2ACDEEAACA8054B8FFBC4AB2C2CC992649F733AFB2446AA3DC66131E62F0697E9267A374A9E965D1286EC3CFEA1142B5244D497974E5992A3F172581BE78559432DA3A64ECC940D3C43A3C91427EEC5FC712A4ADF64D2FC6C31D62BD8E4417964B31AC6E0B8344EADEA6E81DBB33F522979F3C4FE33ECA4240C188C2C88FAEBC3E0C27AEDF79558E9113F2E7BB2CA261666A26CDA82074F0DC777F2BDB28A5A2588F7F4F67E2A4F04C4DDEE6E3A2A78E2106D2F324986705580070A9016C96007E82332EA1F1D2E9688033F514754555CE186695284B05B24DE6C99F22CCF4F43A7CB5D8AD9053929E3EFDAD40FD20497F1D9ED45BAA4C7CF1C2207C751624D755EBF0C4FF98C9B2E41437E41674C836D80C83C902C4B8B8ADDA23D813D9FA5B3331C36B05CE3C1F479220B7A02
     for link in response.xpath("//tbody[@class='etdownloaditems']//tr"):
         item = FirmwareLoader(item=FirmwareImage(), response=response)
         item.add_value("version", link.xpath(
             ".//td[@class='column-version']//text()").extract()[0].strip())
         item.add_value("url", link.xpath(".//th/a/@href").extract()[0])
         item.add_value("description", link.xpath(
             ".//th/a//text()").extract()[0])
         item.add_value("product", response.meta["product"])
         item.add_value("vendor", self.name)
         yield item.load_item()
コード例 #23
0
ファイル: verizon.py プロジェクト: spencerwuwu/scraper
    def parse(self, response):
        if response.xpath("//select[@id='router']"):
            for product in response.xpath(
                    "//select[@id='router']/option/@value").extract():
                if product and product != "allrouters":
                    yield Request(
                        url=urllib.parse.urljoin(
                            response.url, "?router=%s" % (product)),
                        headers={"Referer": response.url},
                        callback=self.parse)

        elif response.xpath("//td[@id='search_main_content']"):
            for link in response.xpath("//td[@id='search_main_content']//a"):
                if link.xpath("./@href"):
                    href = link.xpath("./@href").extract()[0]
                    text = link.xpath(".//text()").extract()

                    if "download.verizon.net" in href and "firmware" in href:
                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response)
                        item.add_value("url", href)
                        item.add_value("description", text[0])
                        item.add_value("vendor", self.name)
                        yield item.load_item()
        else:
            for link in response.xpath("//div[@id='ghfbodycontent']//a"):
                if link.xpath("./@href"):
                    href = link.xpath("./@href").extract()[0]
                    text = link.xpath(".//text()").extract()

                    if "download.verizon.net" in href and "firmware" in href:
                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response)
                        item.add_value("url", href)
                        item.add_value("description", text[0])
                        item.add_value("vendor", self.name)
                        yield item.load_item()
コード例 #24
0
    def parse_download(self, response):
        json_response = json.loads(response.body_as_unicode())

        for file in json_response:
            if file["subFileType"] == "firmware":
                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"])
                item.add_value("version", file["fileVersion"])
                item.add_value("date", datetime.datetime.fromtimestamp(
                    int(file["releaseDate"]) / 1000).strftime(item.context.get("date_fmt")[0]))
                item.add_value("description", file["fileName"])
                item.add_value("url", file["downloadUrl"])
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.vendor)
                yield item.load_item()
コード例 #25
0
ファイル: actiontec.py プロジェクト: Anderson-Liu/scraper
 def parse_product(self, response):
     for image in response.xpath(
             "//div[@id='accordion-2']//tr[position() > 1]"):
         text = image.xpath("./td[2]//a[1]/text()").extract()
         if "firmware" in "".join(text).lower():
             item = FirmwareLoader(item=FirmwareImage(), response=response,
                                   selector=image, date_fmt=["%Y-%m-%d"])
             item.add_xpath("date", "td[1]//text()")
             item.add_value("description", text)
             item.add_xpath("url", "td[2]//a[1]/@href")
             item.add_value("product", response.meta["product"])
             item.add_value("vendor", self.name)
             item.add_value(
                 "version", FirmwareLoader.find_version_period(text))
             yield item.load_item()
コード例 #26
0
    def parse(self, response):
        for entry in response.xpath(
                "//div[@class='main-container']//p|//div[@class='main-container']//ul"
        ):
            text = entry.xpath(".//text()").extract()

            for href in entry.xpath(".//a/@href").extract():
                if "Firmware" in href:
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response)
                    item.add_value("url", href)
                    item.add_value("product",
                                   FirmwareLoader.find_product(text))
                    item.add_value("vendor", self.name)
                    yield item.load_item()
コード例 #27
0
    def parse_product(self, response):
        for href in response.xpath("//a/@href").extract():
            if href.endswith(".npk") or href.endswith(".lzb"):
                text = response.xpath("//text()").extract()
                basename = href.split("/")[-1]

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%Y-%b-%d"])
                item.add_value("date", item.find_date(text))
                item.add_value("url", href)
                item.add_value("product", basename[0:basename.rfind("-")])
                item.add_value("vendor", self.name)
                item.add_value("version",
                               FirmwareLoader.find_version_period(text))
                yield item.load_item()
コード例 #28
0
ファイル: phicomm.py プロジェクト: harry1080/IoT-vulhub
    def parse_product(self, response):
        import re
        #/cn/Uploads/files/20161024/K1_V22.4.2.15.bin
        print response.text
        path = re.findall(u"(/cn/Uploads/files/.*?\.bin)", response.text)[0]
        url = "http://www.phicomm.com/{}".format(path)

        item = FirmwareLoader(item=FirmwareImage())
        item.add_value("url", url),
        item.add_value("product", response.meta['product']),
        item.add_value("date", response.meta['date']),
        item.add_value("version", response.meta['version']),
        item.add_value("vendor", self.vendor),
        item.add_value("description", response.meta['description']),

        yield item.load_item()
コード例 #29
0
ファイル: pfsense.py プロジェクト: harry1080/IoT-vulhub
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath(".//text()").extract()[0]
            href = link.xpath(".//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                yield Request(url=urlparse.urljoin(response.url, href),
                              headers={"Referer": response.url},
                              callback=self.parse)
            elif href.endswith(".gz") and ".iso" not in href:
                # strip off multiple file extensions
                basename = os.path.splitext(text)[0]
                while ".img" in basename or ".iso" in basename:
                    basename = os.path.splitext(basename)[0]

                basename = basename.split("-")
                version = FirmwareLoader.find_version_period(basename)

                # attempt to parse filename and generate product/version
                # strings
                remove = [version] if version else []
                for i in range(0, len(basename)):
                    if "BETA" in basename[i]:
                        version += "-%s%s" % (basename[i], basename[i + 1])
                        remove.append(basename[i])
                        remove.append(basename[i + 1])
                    elif "RC" in basename[i]:
                        version += "-%s" % (basename[i])
                        remove.append(basename[i])
                    elif "RELEASE" in basename[i]:
                        remove.append(basename[i])

                basename = [x for x in basename if x not in remove]

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%d-%b-%Y"])
                item.add_value("version", version)
                item.add_value("url", href)
                item.add_value(
                    "date",
                    item.find_date(link.xpath("following::text()").extract()))
                item.add_value("product", "-".join(basename))
                item.add_value("vendor", self.name)
                yield item.load_item()
コード例 #30
0
ファイル: netgear.py プロジェクト: isundaylee/scraper
    def parse_product(self, response):
        results = []

        outers = response.css('#topicsdownload:not(.hidea)')
        outer_count = len(outers)
        if outer_count not in self.outer_count_map:
            self.outer_count_map[outer_count] = 0
        self.outer_count_map[outer_count] += 1

        if outer_count == 0:
            logging.warning('Cannot find download section on URL: ' +
                            response.request.url)
            return
        elif outer_count > 0:
            logging.warning('Duplicate download sections present on URL: ' +
                            response.request.url + '. Picking the first.')

        outer = outers[0]
        items = outer.css('.accordion-item')

        if len(items) == 0:
            logging.warning('No download items found on URL: ' +
                            response.request.url)
            return

        for item in items:
            name = item.css('.accordion-title h1')[0].xpath(
                "text()").extract()[0].encode('utf-8')
            link = item.css('.accordion-content a')[0].xpath(
                "@href").extract()[0]

            if not ("Firmware" in name or "firmware" in name):
                logging.warning('Skipping non-firmware download: ' + name)
                continue

            result = FirmwareImage()
            result['product'] = response.css('.model .product-code').xpath(
                "text()").extract()[0].strip()
            result['vendor'] = 'Netgear'

            result['description'] = name
            result['url'] = link

            results.append(result)

        return results