Beispiel #1
0
    def grab(path, image_url, file_name=None):
        try:
            if not file_name:
                file_name = image_url.split('/')[-1]
            response = requests.get(image_url)
            content_type = response.headers['content-type']

            if content_type not in ImageLoader.ALLOWED_TYPES:
                return False
            prefix = content_type.split('/')[-1]
            file_name = '%s.%s' % (file_name, prefix)
            file_path = os.path.join(path, file_name)

            write_file(file_path, response.content)
        except exceptions.RequestException as error:
            Logger.record_log('Occurred Exception:', error)
Beispiel #2
0
    def search_and_grab_candy(self, urls=None):
        if None is urls:
            urls = []
        for url in urls:
            self.driver.get(url)
            board_name = self.driver.find_element_by_css_selector(
                '.board-name').text
            image_dir = create_image_dir('%s/%s' %
                                         (self.resource_path, board_name))

            suburls = []
            ret = self.driver.execute_script(
                'return document.querySelectorAll(".pin a.layer-view");')
            while len(ret) > 0:
                for element in ret:
                    suburls.append(element.get_attribute('href'))
                el_last_child = self.driver.find_element_by_css_selector(
                    '.pin[data-seq]:last-child')
                query = ('max=%s&limit=20&wfl=1' %
                         str(el_last_child.get_attribute('data-seq')))
                self.driver.get('%s?%s' % (url, query))
                ret = self.driver.execute_script(
                    'return document.querySelectorAll(".pin a.layer-view");')
                Logger.record_log('Request root url page %s?%s' % (url, query))
                Logger.write_log_file()
            Logger.record_log('Find suburls length: %s' % len(suburls))

            imgurls = []
            for suburl in suburls:
                self.driver.get(suburl)
                el_img = self.driver.find_element_by_css_selector(
                    '.zoom-layer img')
                imgurls.append(el_img.get_attribute('src'))
                Logger.record_log('Request sub url page %s' %
                                  el_img.get_attribute('src'))
                Logger.write_log_file()
            Logger.record_log('Find imgurls length: %s' % len(imgurls))

            for imgurl in imgurls:
                Logger.record_log('Grab: %s' % imgurl)
                ImageLoader.grab(image_dir, imgurl)

            Logger.write_log_file()