def setUp(self): file_urls = 'C:/Users/brian.dsouza/Pictures/image_downloader/fileurls.txt' destination_folder = 'C:/Users/brian.dsouza/Pictures/image_downloader/Images' self.downloader_1 = ImageDownloader(file_urls=file_urls, destination=destination_folder) self.downloader_2 = ImageDownloader(file_urls=file_urls, destination=destination_folder)
def main(): url = get_url() os_type = get_os() finder = ChromedriverFinder(os_type) driver_loc = finder.find_chromedriver() downloader = ImageDownloader(url, driver_loc) downloader.run()
def main(): ''' Creates the parser to verify that arguments are well formatted ''' parser = argparse.ArgumentParser( description= 'Download requested images from google images based on a search query.' ) parser.add_argument('search', nargs='+', help='The search query to be fetched') parser.add_argument( 'limit', type=int, nargs='+', help='The maximum quantity of results to be downloaded') # Get the arguments passed into command arguments = parser.parse_args() # Assigns the argumets passedto variables search = arguments.search[0] limit = arguments.limit[0] downloader = ImageDownloader() downloader.search_images(search, limit)
class TestImageDownloader(unittest.TestCase): @classmethod def setUpClass(cls): pass @classmethod def tearDownClass(cls): pass def setUp(self): file_urls = 'C:/Users/brian.dsouza/Pictures/image_downloader/fileurls.txt' destination_folder = 'C:/Users/brian.dsouza/Pictures/image_downloader/Images' self.downloader_1 = ImageDownloader(file_urls=file_urls, destination=destination_folder) self.downloader_2 = ImageDownloader(file_urls=file_urls, destination=destination_folder) def tearDown(self): self.downloader_1 = None self.downloader_2 = None def test_download_image(self): """ Mock download for requests""" with patch('image_downloader.requests.get') as mocked_get: with patch('builtins.open', unittest.mock.mock_open()) as mocked_file: # Mock when return value is True mocked_get.return_value.ok = True mocked_get.return_value.content = b'imagevalue' output1 = self.downloader_1.download_image( 'http://company.com/image1.png') mocked_get.assert_called_with('http://company.com/image1.png', timeout=10) self.assertEqual(output1, 'Download Success') # Mock when return value is False mocked_get.return_value.ok = False schedule = self.downloader_2.download_image( 'http://company.com/image1.png') mocked_get.assert_called_with('http://company.com/image1.png', timeout=10) self.assertEqual(schedule, 'Download Failed') def test_download_images(self): with self.assertRaises(FileNotFoundError): self.downloader_1.file_urls = "wrongurl" self.downloader_1.download_images()
def download_and_classify_in_batches(complete_links_list, classifier): print("Total amount of images to be downloaded and classified: %d" % len(complete_links_list)) for index in range(0, len(complete_links_list), BATCH_SIZE): time_start = time.time() print("Downloading and classifying batch: %d -> %d" % (index, index + BATCH_SIZE)) links_batch = complete_links_list[index:index + BATCH_SIZE] tensor_images = ImageDownloader.download_images( links_batch, NUM_DOWNLOAD_THREADS) if len(tensor_images) == 0: print("Skipping classification of empy list") continue results = classifier.classify_image_tensors(tensor_images) results_df = DataHandler.convert_classification_result_to_dataframe( results) DataHandler.write_classification_result(results_df, PARQUET_FILE_OUTPUT_LOCATION) duration = time.time() - time_start print("Duration of donwloading and classification for batch: %.2f" % duration)
def createDir(self): c = 0 onerror = False for full_uri in self.full_uris: mkdir = self.saved_path + "/uri_" + str(c) access_rights = 0o755 print ("Creating directory at %s " % mkdir) try: if not os.path.exists(mkdir): os.makedirs(mkdir, access_rights) print ("Successfully created the directory %s " % mkdir) else: print("Directory at %s is already exists!\n" % mkdir) onerror = True except OSError: print("Failed to create directory at %s\n" % mkdir) break url = os.path.split(full_uri)[0] if len(url) < 6: url = full_uri url_path = "" else: url_path = "/" + os.path.split(full_uri)[1] ImageDownloader(url, url_path, mkdir, onerror) print("") c += 1 print("Done.")
def crawl(keyword, n_scroll, engine='baidu'): # --------------------------------------------------- # Basic settings for ImageCrawler and ImageDownloader # --------------------------------------------------- print(SEP + 'Basic settings for ImageCrawler and ImageDownloader\n' + SEP) keywordHash = get_md5(keyword) link_save_dir = os.path.join(os.getcwd(), '..', 'data', 'links', keywordHash) image_save_dir = os.path.join(os.getcwd(), '..', 'data', 'images', keywordHash) print('Keyword:', keyword) print('Number of scrolling:', n_scroll) print('Links saved in:', link_save_dir) print('Images saved in:', image_save_dir) print() # ---------------------------------- # Save images' links by ImageCrawler # ---------------------------------- print(SEP + "Save images' links by ImageCrawler\n" + SEP) # Search images in baidu links_name = '%s_links.csv' % engine ic = ImageCrawler(engine) ic.run(keyword, n_scroll) ic.save_links(link_save_dir, links_name) print("Images' links are saved in: " + link_save_dir + '\n') # ------------------------------ # Save images by ImageDownloader # ------------------------------ print(SEP + 'Save images by ImageDownloader\n' + SEP) # Download images to directory ider = ImageDownloader(link_save_dir) ider.run(image_save_dir) print('Images are saved in: ' + image_save_dir + '\n')
def download_image(photo_video_download_url, count): downloaded_photo = ImageDownloader.get_image_from_link( photo_video_download_url) if downloaded_photo is None: return None count.add(1) buffer = io.BytesIO() downloaded_photo.save(buffer, format='PNG') return buffer.getvalue()
def job(): data_filename = 'data.csv' image_filename = 'image.jpg' absolute_path = str(pathlib.Path(__file__).parent.absolute()) image_filename_absolute_path = absolute_path + '/' + image_filename if os.path.exists(data_filename): os.remove(data_filename) process = CrawlerProcess() process.crawl(Scrapper) process.start() data_parsed = parse_data(data_filename) sample = data_parsed.sample() imageDownloader = ImageDownloader(sample['url_image'].item(), image_filename) if imageDownloader.successfull_download: set_gnome_wallpaper(image_filename_absolute_path) else: print('Error downloading, cannot set gnome wallpaper')
def edit(request, wiki_id): """ 用户编辑文章 """ current_page = 'user_wiki' title = '修改文章' try: wiki_id = int(wiki_id) except ValueError: raise Http404() try: wiki = Entry.objects.get(id=wiki_id, author=request.user) except Entry.DoesNotExist: raise Http404() # 处理GET请求 if request.method == 'GET': form = WikiForm(instance=wiki) return render('wiki_add.html', locals(), context_instance=RequestContext(request)) # 处理POST请求 form = WikiForm(request.POST) if form.is_valid(): data = form.cleaned_data wiki.title = data['title'] wiki.content = data['content'] wiki.source = data['source'] and data[ 'source'] or 'http://pythoner.net/home/%d/' % request.user.id try: wiki.save() except Exception, e: messages.error(request, '保存文章时出错:%s' % e) return HttpResponseRedirect('/home/wiki/') else: messages.success(request, '修改成功!') # 开启添加标签线程 TagingThread(wiki_object=wiki).start() ImageDownloader(wiki).start() return HttpResponseRedirect('/wiki/%d/' % wiki.id)
def add(request): """ 用户写新的文章 """ current_page = 'user_wiki' title = '写新笔记' # 处理GET请求 if request.method == 'GET': form = WikiForm() return render('wiki_add.html', locals(), context_instance=RequestContext(request)) # 处理POST请求 form = WikiForm(request.POST) if form.is_valid(): data = form.cleaned_data new_wiki = Entry() new_wiki.author = request.user new_wiki.title = data['title'] new_wiki.content = data['content'] new_wiki.source = data['source'] and data[ 'source'] or 'http://pythoner.net/home/%d/' % request.user.id try: new_wiki.save() except Exception, e: return HttpResponse('保存文章时出错:%s' % e) else: # 开启线程添加文章标签 TagingThread(wiki_object=new_wiki).start() # 开启下载图片的线程 ImageDownloader(new_wiki).start() # 发送信号 new_wiki_was_post.send(sender=new_wiki.__class__, wiki=new_wiki) return HttpResponseRedirect('/wiki/%d/' % new_wiki.id)
def test_get_image_returns_image_content(self): name, image = ImageDownloader.get_image('http://domain.com/image.png') assert image == self.response.content assert name == 'image.png'
from image_downloader import ImageDownloader if __name__ == '__main__': file_urls = 'C:/Users/brian.dsouza/Pictures/image_downloader/fileurls.txt' destination_folder = 'C:/Users/brian.dsouza/Pictures/image_downloader/Images' image_downloader = ImageDownloader(file_urls=file_urls, destination=destination_folder) image_downloader.download_images()
def test_file_is_saved(self): ImageDownloader.save('image.png', 'image data') self.assert_file_saved('image.png', 'image data')
def test_suffix_is_added_to_file_name_if_already_exists(self): self.is_file_mock.side_effect = [True, True, True, False] ImageDownloader.save('image.png', 'image data') self.assert_file_saved('image-3.png', 'image data')
def test_download_images(self): ImageDownloader.download(['http://domain.com/image.png']) self.assert_file_saved('image.png', 'image content data')
import io import torch import sys CLASSES_LOCATION = '../scripts/classifier/imagenet_classes.json' PARQUET_FILE_INPUT_LOCATION = "/home/corneliu/flickr.parquet" PARQUET_FILE_OUTPUT_LOCATION = "/home/corneliu/classification_result.parquet" CUDA = True SHOULD_USE_REDUCED_SAMPLED = False SAMPLE_SIZE = 50 MAX_LABELS = 5 NUM_WORKERS = 1 NUM_DOWNLOAD_THREADS = 8 BATCH_SIZE = 250 links_row = DataHandler.get_unprocessed_links(PARQUET_FILE_INPUT_LOCATION, "saf")[:20] # download_links = [row.photo_video_download_url for row in links] # print(download_links) for row in links_row: image_id, download_url, image_as_tensor = ImageDownloader.download_and_preprocess_image( row) if image_as_tensor is None: continue buffer = io.BytesIO() torch.save(image_as_tensor, buffer)
# # Search images in bing # engine = 'bing' # bing_links_name = 'bing_links.csv' # bing_ic = ImageCrawler(engine) # bing_ic.run(keyword, n_scroll) # bing_ic.save_links(link_save_dir, bing_links_name) # Search images in google engine = 'google' google_links_name = 'google_links.csv' google_ic = ImageCrawler(engine) google_ic.run(keyword, n_scroll) google_ic.save_links(link_save_dir, google_links_name) print("Images' links are saved in: " + link_save_dir + '\n') # ------------------------------ # Save images by ImageDownloader # ------------------------------ print(SEP + 'Save images by ImageDownloader\n' + SEP) # Download images to directory ider = ImageDownloader(link_save_dir) ider.run(image_save_dir) print('Images are saved in: ' + image_save_dir + '\n')
def test_get_image_returns_none_if_response_code_not_200(self): self.response.status_code = 404 with self.assertRaises(DownloadError): ImageDownloader.get_image('http://domain.com/image.png')