def crawl(type_crawler, search, number_of_results, path_to_save, color_code=5): if type_crawler == 'flickr': flickr_crawler = FlickrImageCrawler('9b72938db332c2514acce33c793c2f1a', path_to_save) flickr_crawler.crawl(max_num=number_of_results, feeder_thr_num=1, parser_thr_num=1, downloader_thr_num=1, text=search, color=color_code) else: google_crawler = GoogleImageCrawler(path_to_save, search) google_crawler.crawl(keyword=search, offset=0, max_num=number_of_results, date_min=None, date_max=None, feeder_thr_num=1, parser_thr_num=1, downloader_thr_num=4, min_size=(200, 200), max_size=None)
import sys import os from icrawler.examples import GoogleImageCrawler kw_file = sys.argv[1] out_dir = sys.argv[2] for line in open(kw_file): kw = line.strip() print(kw, '...') out_fd = out_dir + '/' + kw if not os.path.exists(out_fd): os.makedirs(out_fd) else: os.system('rm -rf %s/*' % (out_fd)) google_crawler = GoogleImageCrawler(out_fd) google_crawler.crawl(keyword=kw, offset=0, max_num=100, date_min=None, date_max=None, feeder_thr_num=1, parser_thr_num=1, downloader_thr_num=4, min_size=(200, 200), max_size=(800, 800))
from icrawler.examples import GoogleImageCrawler from icrawler.examples import BingImageCrawler from icrawler.examples import BaiduImageCrawler google_crawler = GoogleImageCrawler('set directory location') # example # google_crawler = GoogleImageCrawler('C:\Python\image\Google') google_crawler.crawl(keyword='Bed', offset=0, max_num=500, date_min=None, date_max=None, feeder_thr_num=1, parser_thr_num=1, downloader_thr_num=1, min_size=(500,500), max_size=None)
def test_google(keyword, folder, count): folder = 'google/' + folder google_crawler = GoogleImageCrawler(folder, log_level=logging.INFO) google_crawler.crawl(keyword, 0, count, date(2000, 1, 1), date(2017, 1, 5), 1, 1, 4)
from icrawler.examples import GoogleImageCrawler from icrawler.examples import BingImageCrawler #from icrawler.examples import BaiduImageCrawler # max number of images to download from each source num = 500 # max number of threads to use for each source num_threads = 8 google_crawler = GoogleImageCrawler('data/google') google_crawler.crawl(keyword='isis flag', offset=0, max_num=num, date_min=None, date_max=None, feeder_thr_num=1, parser_thr_num=1, downloader_thr_num=num_threads, min_size=(200, 200), max_size=None) bing_crawler = BingImageCrawler('data/bing') bing_crawler.crawl(keyword='isis flag', offset=0, max_num=num, feeder_thr_num=1, parser_thr_num=1, downloader_thr_num=num_threads, min_size=None, max_size=None)
import os from icrawler.examples import GoogleImageCrawler _QUERY = "photos of normal faces -drawing -art -cartoon -funny" _DIR = "training_data/regular" if not os.path.exists(_DIR): os.makedirs(_DIR) google_crawler = GoogleImageCrawler(_DIR) google_crawler.crawl(keyword=_QUERY, offset=0, max_num=1000, date_min=None, date_max=None, feeder_thr_num=1, parser_thr_num=1, downloader_thr_num=4, min_size=(200, 200), max_size=None)
def test_google(): google_crawler = GoogleImageCrawler('images/google', log_level=logging.INFO) google_crawler.crawl('cloudy', 0, 10, date(2016, 2, 1), date(2016, 3, 15), 1, 1, 4)
directory = "/tf_files/photos" + "/" + label #tensorFlow parse all the folder in the tf_files #if by any change it finds a folder or a subfolder with a name #and picture instead of it. It will train on those pictures #with a label as the name of the folder #first thing is we chech whether a folder exists with the given #name so to know whether our Neural Networks are already trained #on that label if not we make a directory with all crawled picture #inside of it and start the training process if not os.path.exists(directory): #the label does not exists so we create one os.mkdir(directory) #crawling from google image on X label google_crawler = GoogleImageCrawler(directory) google_crawler.crawl(keyword=label, offset=0, max_num=40, date_min=None, date_max=None, feeder_thr_num=1, parser_thr_num=1, downloader_thr_num=4, min_size=(200, 200), max_size=None) #delete all picture found in the directory that are not JPEG #by checking there extention and by running the file command #and parse whether there is the world JPEG image data in it #to confirm that is not a corrupted JPEG file for root, dirs, files in os.walk(directory):