예제 #1
0
    def test_parseImgLinks(self):
        httpretty.enable()
        resources_dir = os.path.dirname(os.path.abspath(__file__))
        for resource in self._resources:
            with open(os.path.join(HTTP_RESOURCES_PATH, resource["file"])) as html:
                httpretty.register_uri(httpretty.GET, resource["site"], body=html.read())
                imgs = ImageScrapy(resource["site"])
                links = imgs.parseImgLinks()
                self.assertSetEqual(links, resource["result"], "Test HTML page link parsing")

        httpretty.disable()
        httpretty.reset()
예제 #2
0
def site_scrape(site,depth=1):
    """
    Takes a URL and scrapes all the images
    """
    click.echo(" - About to scrape images from: {0}".format(site))
    scrap_images = ImageScrapy(site)
    dirName = scrap_images.filePath()
    img_list = scrap_images.parseImgLinks(depth)
    if len(img_list):
        click.echo("   Scraping collected images:")
        scrap_images.downloadImages(dirName, img_list)
    else:
        click.echo("   Images not found on this page")
    def test_parseImgLinks(self):
        httpretty.enable()
        resources_dir = os.path.dirname(os.path.abspath(__file__))
        for resource in self._resources:
            with open(os.path.join(HTTP_RESOURCES_PATH,
                                   resource['file'])) as html:
                httpretty.register_uri(httpretty.GET,
                                       resource['site'],
                                       body=html.read())
                imgs = ImageScrapy(resource['site'])
                links = imgs.parseImgLinks()
                self.assertSetEqual(links, resource['result'],
                                    "Test HTML page link parsing")

        httpretty.disable()
        httpretty.reset()
예제 #4
0
def site_scrape(site, depth=1):
    """
    Takes a URL and scrapes all the images
    """
    click.echo(" - About to scrape images from: {0}".format(site))
    scrap_images = ImageScrapy(site)
    dirName = scrap_images.filePath()
    img_list = scrap_images.parseImgLinks(depth)
    if len(img_list):
        click.echo("   Scraping collected images:")
        scrap_images.downloadImages(dirName, img_list)
    else:
        click.echo("   Images not found on this page")
 def test_instantiation(self):
     self.assertIsInstance(ImageScrapy(self._resources[0]['site']),
                           ImageScrapy, "Test ImageScrapy instance")