コード例 #1
0
 def setUp(self):
     file_urls = 'C:/Users/brian.dsouza/Pictures/image_downloader/fileurls.txt'
     destination_folder = 'C:/Users/brian.dsouza/Pictures/image_downloader/Images'
     self.downloader_1 = ImageDownloader(file_urls=file_urls,
                                         destination=destination_folder)
     self.downloader_2 = ImageDownloader(file_urls=file_urls,
                                         destination=destination_folder)
コード例 #2
0
def main():
    url = get_url()
    os_type = get_os()
    finder = ChromedriverFinder(os_type)
    driver_loc = finder.find_chromedriver()
    downloader = ImageDownloader(url, driver_loc)
    downloader.run()
コード例 #3
0
def main():
    '''
    Creates the parser to verify that arguments are well formatted
    '''
    parser = argparse.ArgumentParser(
        description=
        'Download requested images from google images based on a search query.'
    )
    parser.add_argument('search',
                        nargs='+',
                        help='The search query to be fetched')
    parser.add_argument(
        'limit',
        type=int,
        nargs='+',
        help='The maximum quantity of results to be downloaded')

    # Get the arguments passed into command
    arguments = parser.parse_args()

    # Assigns the argumets passedto variables
    search = arguments.search[0]
    limit = arguments.limit[0]

    downloader = ImageDownloader()
    downloader.search_images(search, limit)
コード例 #4
0
class TestImageDownloader(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        pass

    @classmethod
    def tearDownClass(cls):
        pass

    def setUp(self):
        file_urls = 'C:/Users/brian.dsouza/Pictures/image_downloader/fileurls.txt'
        destination_folder = 'C:/Users/brian.dsouza/Pictures/image_downloader/Images'
        self.downloader_1 = ImageDownloader(file_urls=file_urls,
                                            destination=destination_folder)
        self.downloader_2 = ImageDownloader(file_urls=file_urls,
                                            destination=destination_folder)

    def tearDown(self):
        self.downloader_1 = None
        self.downloader_2 = None

    def test_download_image(self):
        """ Mock download for requests"""
        with patch('image_downloader.requests.get') as mocked_get:

            with patch('builtins.open',
                       unittest.mock.mock_open()) as mocked_file:

                # Mock when return value is True
                mocked_get.return_value.ok = True
                mocked_get.return_value.content = b'imagevalue'
                output1 = self.downloader_1.download_image(
                    'http://company.com/image1.png')
                mocked_get.assert_called_with('http://company.com/image1.png',
                                              timeout=10)
                self.assertEqual(output1, 'Download Success')

                # Mock when return value is False
                mocked_get.return_value.ok = False
                schedule = self.downloader_2.download_image(
                    'http://company.com/image1.png')
                mocked_get.assert_called_with('http://company.com/image1.png',
                                              timeout=10)
                self.assertEqual(schedule, 'Download Failed')

    def test_download_images(self):

        with self.assertRaises(FileNotFoundError):
            self.downloader_1.file_urls = "wrongurl"
            self.downloader_1.download_images()
コード例 #5
0
def download_and_classify_in_batches(complete_links_list, classifier):
    print("Total amount of images to be downloaded and classified: %d" %
          len(complete_links_list))

    for index in range(0, len(complete_links_list), BATCH_SIZE):
        time_start = time.time()
        print("Downloading and classifying batch: %d -> %d" %
              (index, index + BATCH_SIZE))

        links_batch = complete_links_list[index:index + BATCH_SIZE]
        tensor_images = ImageDownloader.download_images(
            links_batch, NUM_DOWNLOAD_THREADS)

        if len(tensor_images) == 0:
            print("Skipping classification of empy list")
            continue

        results = classifier.classify_image_tensors(tensor_images)
        results_df = DataHandler.convert_classification_result_to_dataframe(
            results)
        DataHandler.write_classification_result(results_df,
                                                PARQUET_FILE_OUTPUT_LOCATION)

        duration = time.time() - time_start
        print("Duration of donwloading and classification for batch: %.2f" %
              duration)
コード例 #6
0
	def createDir(self):
		c = 0
		onerror = False
		for full_uri in self.full_uris:
			mkdir = self.saved_path + "/uri_" + str(c)
			access_rights = 0o755
			print ("Creating directory at %s " % mkdir)
			try:
				if not os.path.exists(mkdir):
					os.makedirs(mkdir, access_rights)
					print ("Successfully created the directory %s " % mkdir)
				else:
					print("Directory at %s is already exists!\n" % mkdir)
					onerror = True
			except OSError:
				print("Failed to create directory at %s\n" % mkdir)
				break
			
			url = os.path.split(full_uri)[0]
			if len(url) < 6:
				url = full_uri
				url_path = ""
			else:
				url_path = "/" + os.path.split(full_uri)[1]
			ImageDownloader(url, url_path, mkdir, onerror)
			print("")
			c += 1
		print("Done.")
コード例 #7
0
ファイル: crawler.py プロジェクト: tjnh05/LeseNet
def crawl(keyword, n_scroll, engine='baidu'):
    # ---------------------------------------------------
    # Basic settings for ImageCrawler and ImageDownloader
    # ---------------------------------------------------
    print(SEP + 'Basic settings for ImageCrawler and ImageDownloader\n' + SEP)

    keywordHash = get_md5(keyword)

    link_save_dir = os.path.join(os.getcwd(), '..', 'data', 'links',
                                 keywordHash)
    image_save_dir = os.path.join(os.getcwd(), '..', 'data', 'images',
                                  keywordHash)

    print('Keyword:', keyword)
    print('Number of scrolling:', n_scroll)
    print('Links saved in:', link_save_dir)
    print('Images saved in:', image_save_dir)
    print()

    # ----------------------------------
    # Save images' links by ImageCrawler
    # ----------------------------------

    print(SEP + "Save images' links by ImageCrawler\n" + SEP)

    # Search images in baidu
    links_name = '%s_links.csv' % engine

    ic = ImageCrawler(engine)
    ic.run(keyword, n_scroll)
    ic.save_links(link_save_dir, links_name)

    print("Images' links are saved in: " + link_save_dir + '\n')

    # ------------------------------
    # Save images by ImageDownloader
    # ------------------------------

    print(SEP + 'Save images by ImageDownloader\n' + SEP)

    # Download images to directory
    ider = ImageDownloader(link_save_dir)
    ider.run(image_save_dir)

    print('Images are saved in: ' + image_save_dir + '\n')
コード例 #8
0
def download_image(photo_video_download_url, count):
    downloaded_photo = ImageDownloader.get_image_from_link(
        photo_video_download_url)

    if downloaded_photo is None:
        return None

    count.add(1)

    buffer = io.BytesIO()
    downloaded_photo.save(buffer, format='PNG')

    return buffer.getvalue()
コード例 #9
0
def job():
    data_filename = 'data.csv'
    image_filename = 'image.jpg'
    absolute_path = str(pathlib.Path(__file__).parent.absolute())
    image_filename_absolute_path = absolute_path + '/' + image_filename
    if os.path.exists(data_filename):
        os.remove(data_filename)
    process = CrawlerProcess()
    process.crawl(Scrapper)
    process.start()
    data_parsed = parse_data(data_filename)
    sample = data_parsed.sample()
    imageDownloader = ImageDownloader(sample['url_image'].item(),
                                      image_filename)
    if imageDownloader.successfull_download:
        set_gnome_wallpaper(image_filename_absolute_path)
    else:
        print('Error downloading, cannot set gnome wallpaper')
コード例 #10
0
ファイル: views.py プロジェクト: xo0/pythoner.net
def edit(request, wiki_id):
    """ 用户编辑文章 """
    current_page = 'user_wiki'
    title = '修改文章'

    try:
        wiki_id = int(wiki_id)
    except ValueError:
        raise Http404()

    try:
        wiki = Entry.objects.get(id=wiki_id, author=request.user)
    except Entry.DoesNotExist:
        raise Http404()

    # 处理GET请求
    if request.method == 'GET':
        form = WikiForm(instance=wiki)
        return render('wiki_add.html',
                      locals(),
                      context_instance=RequestContext(request))

    # 处理POST请求
    form = WikiForm(request.POST)
    if form.is_valid():
        data = form.cleaned_data
        wiki.title = data['title']
        wiki.content = data['content']
        wiki.source = data['source'] and data[
            'source'] or 'http://pythoner.net/home/%d/' % request.user.id
        try:
            wiki.save()
        except Exception, e:
            messages.error(request, '保存文章时出错:%s' % e)
            return HttpResponseRedirect('/home/wiki/')
        else:
            messages.success(request, '修改成功!')

        # 开启添加标签线程
        TagingThread(wiki_object=wiki).start()
        ImageDownloader(wiki).start()

        return HttpResponseRedirect('/wiki/%d/' % wiki.id)
コード例 #11
0
ファイル: views.py プロジェクト: xo0/pythoner.net
def add(request):
    """ 用户写新的文章 """
    current_page = 'user_wiki'
    title = '写新笔记'

    # 处理GET请求
    if request.method == 'GET':
        form = WikiForm()
        return render('wiki_add.html',
                      locals(),
                      context_instance=RequestContext(request))

    # 处理POST请求
    form = WikiForm(request.POST)
    if form.is_valid():
        data = form.cleaned_data
        new_wiki = Entry()
        new_wiki.author = request.user
        new_wiki.title = data['title']
        new_wiki.content = data['content']
        new_wiki.source = data['source'] and data[
            'source'] or 'http://pythoner.net/home/%d/' % request.user.id

        try:
            new_wiki.save()
        except Exception, e:
            return HttpResponse('保存文章时出错:%s' % e)
        else:

            # 开启线程添加文章标签
            TagingThread(wiki_object=new_wiki).start()
            # 开启下载图片的线程
            ImageDownloader(new_wiki).start()
            # 发送信号
            new_wiki_was_post.send(sender=new_wiki.__class__, wiki=new_wiki)
            return HttpResponseRedirect('/wiki/%d/' % new_wiki.id)
コード例 #12
0
 def test_get_image_returns_image_content(self):
     name, image = ImageDownloader.get_image('http://domain.com/image.png')
     assert image == self.response.content
     assert name == 'image.png'
コード例 #13
0
from image_downloader import ImageDownloader


if __name__ == '__main__':
    file_urls = 'C:/Users/brian.dsouza/Pictures/image_downloader/fileurls.txt'
    destination_folder = 'C:/Users/brian.dsouza/Pictures/image_downloader/Images'
    image_downloader = ImageDownloader(file_urls=file_urls, destination=destination_folder)
    image_downloader.download_images()
コード例 #14
0
 def test_file_is_saved(self):
     ImageDownloader.save('image.png', 'image data')
     self.assert_file_saved('image.png', 'image data')
コード例 #15
0
 def test_suffix_is_added_to_file_name_if_already_exists(self):
     self.is_file_mock.side_effect = [True, True, True, False]
     ImageDownloader.save('image.png', 'image data')
     self.assert_file_saved('image-3.png', 'image data')
コード例 #16
0
 def test_download_images(self):
     ImageDownloader.download(['http://domain.com/image.png'])
     self.assert_file_saved('image.png', 'image content data')
コード例 #17
0
import io
import torch
import sys

CLASSES_LOCATION = '../scripts/classifier/imagenet_classes.json'
PARQUET_FILE_INPUT_LOCATION = "/home/corneliu/flickr.parquet"

PARQUET_FILE_OUTPUT_LOCATION = "/home/corneliu/classification_result.parquet"

CUDA = True
SHOULD_USE_REDUCED_SAMPLED = False
SAMPLE_SIZE = 50

MAX_LABELS = 5
NUM_WORKERS = 1
NUM_DOWNLOAD_THREADS = 8
BATCH_SIZE = 250

links_row = DataHandler.get_unprocessed_links(PARQUET_FILE_INPUT_LOCATION,
                                              "saf")[:20]
# download_links = [row.photo_video_download_url for row in links]
# print(download_links)

for row in links_row:
    image_id, download_url, image_as_tensor = ImageDownloader.download_and_preprocess_image(
        row)
    if image_as_tensor is None:
        continue

    buffer = io.BytesIO()
    torch.save(image_as_tensor, buffer)
コード例 #18
0
ファイル: crawler.py プロジェクト: arisosoftware/LeseNet
# # Search images in bing
# engine = 'bing'
# bing_links_name = 'bing_links.csv'

# bing_ic = ImageCrawler(engine)
# bing_ic.run(keyword, n_scroll)
# bing_ic.save_links(link_save_dir, bing_links_name)

# Search images in google
engine = 'google'
google_links_name = 'google_links.csv'

google_ic = ImageCrawler(engine)
google_ic.run(keyword, n_scroll)
google_ic.save_links(link_save_dir, google_links_name)

print("Images' links are saved in: " + link_save_dir + '\n')

# ------------------------------
# Save images by ImageDownloader
# ------------------------------

print(SEP + 'Save images by ImageDownloader\n' + SEP)

# Download images to directory
ider = ImageDownloader(link_save_dir)
ider.run(image_save_dir)

print('Images are saved in: ' + image_save_dir + '\n')
コード例 #19
0
 def test_get_image_returns_none_if_response_code_not_200(self):
     self.response.status_code = 404
     with self.assertRaises(DownloadError):
         ImageDownloader.get_image('http://domain.com/image.png')