Example #1
0
    def download(self, keyword):
        source = Links()
        print('Collecting downloadable links of {}...'.format(keyword))
        links = source.collect(keyword)

        print(
            'Downloading images of {} from collected links...'.format(keyword))
        self.mkdir('{}/{}'.format(self.path, keyword))
        n_links = len(links)
        for index, link in enumerate(links):
            try:
                print(
                    'Downloading this image based on the keyword {} from {}: {}/{}'
                    .format(keyword, link, index + 1, n_links))
                response = requests.get(link, stream=True)
                ext = self.get_extension(link)
                raw_path = '{}/{}/{}'.format(self.path, keyword,
                                             str(index).zfill(4))
                path = raw_path + '.' + ext
                self.save(response, path)

                del response

                print("Validating image file")
                ext2 = self.validate(path)
                if ext2 is None:
                    print('Unreadable file - {}'.format(link))
                    os.remove(path)
                else:
                    if ext != ext2:
                        path2 = raw_path + '.' + ext2
                        os.rename(path, path2)
                        print('Renaming extension {} -> {}'.format(ext, ext2))
            except Exception as e:
                print('Download failed.', e)
                continue