def test_download_image_default(self): d = Downloader() d.download_images('ネコ', TestDownloadImage.TEST_DIR_NAME) files = [ x for x in Path(TestDownloadImage.TEST_DIR_NAME).glob('*') if x.is_file() ] self.assertEqual(100, len(files)) for f in files: img = imread(str(f)) self.assertIsNotNone(img)
class DownloadWidget(MouseEventMixin, QWidget): __progress_download = Signal(int) complete_download = Signal() def __init__(self, download_keyword, dirname, parent=None): super(DownloadWidget, self).__init__() super(MouseEventMixin, self).__init__(parent) self.__download_keyword = download_keyword self.__dirname = dirname self.__progress_bar = None self.__downloader = Downloader(self.progress_download_callback) self.__progress_download.connect(self.on_progress_download) self.init_ui() def init_ui(self): label = QLabel('カプチーノを入れています...', self) label.setAlignment(Qt.AlignCenter) pbar = QProgressBar(self) pbar.setRange(0, DOUNLOAD_COUNT) pbar.setTextVisible(False) vbox = QVBoxLayout(self) vbox.addWidget(label) vbox.addWidget(pbar) vbox.setContentsMargins(QMargins(16, 16, 16, 16)) self.setLayout(vbox) self.__progress_bar = pbar def start_download(self): def _inner(keyword, dirname): self.__downloader.download_images(keyword, dirname, DOUNLOAD_COUNT, MIN_SIZE) self.complete_download.emit() th = Thread(target=_inner, args=(self.__download_keyword, self.__dirname)) th.setDaemon(True) th.start() def progress_download_callback(self, progress): self.__progress_download.emit(progress) def on_progress_download(self, progress): self.__progress_bar.setValue(progress)
def test_download_image(self): FILE_COUNT = 10 MIN_WIDTH = 400 MIN_HEIGHT = 300 d = Downloader() d.download_images('ネコ', TestDownloadImage.TEST_DIR_NAME, FILE_COUNT, (MIN_WIDTH, MIN_HEIGHT)) files = [ x for x in Path(TestDownloadImage.TEST_DIR_NAME).glob('*') if x.is_file() ] self.assertEqual(FILE_COUNT, len(files)) for f in files: img = imread(str(f)) self.assertIsNotNone(img) h, w, _ = img.shape self.assertGreaterEqual(w, MIN_WIDTH) self.assertGreaterEqual(h, MIN_HEIGHT)
class DownloaderModel(QObject): prog_value_changed = Signal(int) prog_max_changed = Signal(int) download_completed = Signal() def __init__(self, download_keyword, dirname, parent=None): super().__init__(parent) self.__prog_value = 0 self.__download_keyword = download_keyword self.__dirname = dirname self.__downloader = Downloader(self.progress_download_callback) @Property(int, notify=prog_value_changed) def prog_value(self): return self.__prog_value @prog_value.setter def prog_value(self, value): if self.__prog_value != value: self.__prog_value = value self.prog_value_changed.emit(self.__prog_value) @Property(int, notify=prog_max_changed) def prog_max(self): return DOUNLOAD_COUNT @Slot() def start_download(self): def _inner(keyword, dirname): self.__downloader.download_images(keyword, dirname, DOUNLOAD_COUNT, MIN_SIZE) self.download_completed.emit() th = Thread(target=_inner, args=(self.__download_keyword, self.__dirname)) th.setDaemon(True) th.start() def progress_download_callback(self, progress): self.prog_value = progress
# fixed_ = [v for k,v in product_data.items() if not k == 'info'] + \ # [v for k,v in product_data['info'].items() if not k == 'images'] + \ # [product_hash] # product_data_ = [fixed_ + [image_url] for image_url in product_data['info']['images']] # products_data[i:i+len(product_data_)] = product_data_ # i += len(product_data_) products_df['image_hash'] = products_df['image_url'].apply( lambda x: hashlib.md5(x.encode('utf-8')).hexdigest()) if not os.path.exists(args.image_fd): os.mkdir(args.image_fd) from downloader import Downloader downloader = Downloader() downloader.download_images(products_df.image_url.values, args.image_fd, max_workers=args.max_workers) image_hashes = list(map(lambda x: x.split('.')[0], os.listdir(args.image_fd))) products_df = products_df[products_df.image_hash.isin(image_hashes)].copy() products_df['attributes'] = '{}' products_df['source'] = 'vip' products_df['product0street1'] = 0 products_df = products_df.rename(index=str, columns={ 'width': 'w', 'height': 'h', 'image_hash': 'orig_name' }).copy() df_diff, df_same = [ x for _, x in products_df.groupby(
import sys from downloader import Downloader if __name__ == "__main__": try: file_name = sys.argv[1] except IndexError: print("Must provide one argument as the filename.") d = Downloader(file_name=file_name) d.download_images()