예제 #1
0
파일: crawl.py 프로젝트: zirkis/LILO
def crawl(type_crawler, search, number_of_results, path_to_save, color_code=5):

    if type_crawler == 'flickr':
        flickr_crawler = FlickrImageCrawler('9b72938db332c2514acce33c793c2f1a',
                                            path_to_save)
        flickr_crawler.crawl(max_num=number_of_results,
                             feeder_thr_num=1,
                             parser_thr_num=1,
                             downloader_thr_num=1,
                             text=search,
                             color=color_code)
    else:
        google_crawler = GoogleImageCrawler(path_to_save, search)
        google_crawler.crawl(keyword=search,
                             offset=0,
                             max_num=number_of_results,
                             date_min=None,
                             date_max=None,
                             feeder_thr_num=1,
                             parser_thr_num=1,
                             downloader_thr_num=4,
                             min_size=(200, 200),
                             max_size=None)
예제 #2
0
import sys
import os

from icrawler.examples import GoogleImageCrawler

kw_file = sys.argv[1]
out_dir = sys.argv[2]

for line in open(kw_file):
    kw = line.strip()
    print(kw, '...')
    out_fd = out_dir + '/' + kw
    if not os.path.exists(out_fd):
        os.makedirs(out_fd)
    else:
        os.system('rm -rf %s/*' % (out_fd))

    google_crawler = GoogleImageCrawler(out_fd)
    google_crawler.crawl(keyword=kw,
                         offset=0,
                         max_num=100,
                         date_min=None,
                         date_max=None,
                         feeder_thr_num=1,
                         parser_thr_num=1,
                         downloader_thr_num=4,
                         min_size=(200, 200),
                         max_size=(800, 800))
예제 #3
0
from icrawler.examples import GoogleImageCrawler
from icrawler.examples import BingImageCrawler
from icrawler.examples import BaiduImageCrawler


google_crawler = GoogleImageCrawler('set directory location')
# example
# google_crawler = GoogleImageCrawler('C:\Python\image\Google')
google_crawler.crawl(keyword='Bed', offset=0, max_num=500,
                     date_min=None, date_max=None, feeder_thr_num=1,
                     parser_thr_num=1, downloader_thr_num=1,
                     min_size=(500,500), max_size=None)
예제 #4
0
파일: test.py 프로젝트: tinyloop/UTKFace
def test_google(keyword, folder, count):
    folder = 'google/' + folder
    google_crawler = GoogleImageCrawler(folder, log_level=logging.INFO)
    google_crawler.crawl(keyword, 0, count, date(2000, 1, 1), date(2017, 1, 5),
                         1, 1, 4)
예제 #5
0
파일: my_test.py 프로젝트: ddmng/icrawler
from icrawler.examples import GoogleImageCrawler
from icrawler.examples import BingImageCrawler
#from icrawler.examples import BaiduImageCrawler

# max number of images to download from each source
num = 500
# max number of threads to use for each source
num_threads = 8

google_crawler = GoogleImageCrawler('data/google')
google_crawler.crawl(keyword='isis flag',
                     offset=0,
                     max_num=num,
                     date_min=None,
                     date_max=None,
                     feeder_thr_num=1,
                     parser_thr_num=1,
                     downloader_thr_num=num_threads,
                     min_size=(200, 200),
                     max_size=None)

bing_crawler = BingImageCrawler('data/bing')
bing_crawler.crawl(keyword='isis flag',
                   offset=0,
                   max_num=num,
                   feeder_thr_num=1,
                   parser_thr_num=1,
                   downloader_thr_num=num_threads,
                   min_size=None,
                   max_size=None)
예제 #6
0
import os
from icrawler.examples import GoogleImageCrawler

_QUERY = "photos of normal faces -drawing -art -cartoon -funny"
_DIR = "training_data/regular"

if not os.path.exists(_DIR):
    os.makedirs(_DIR)

google_crawler = GoogleImageCrawler(_DIR)
google_crawler.crawl(keyword=_QUERY,
                     offset=0,
                     max_num=1000,
                     date_min=None,
                     date_max=None,
                     feeder_thr_num=1,
                     parser_thr_num=1,
                     downloader_thr_num=4,
                     min_size=(200, 200),
                     max_size=None)
예제 #7
0
def test_google():
    google_crawler = GoogleImageCrawler('images/google',
                                        log_level=logging.INFO)
    google_crawler.crawl('cloudy', 0, 10, date(2016, 2, 1), date(2016, 3, 15),
                         1, 1, 4)
예제 #8
0
파일: test.py 프로젝트: Lightjohn/icrawler
def test_google():
    google_crawler = GoogleImageCrawler('images/google', log_level=logging.INFO)
    google_crawler.crawl('cloudy', 0, 10, date(2016, 2, 1),
                         date(2016, 3, 15), 1, 1, 4)
예제 #9
0
directory = "/tf_files/photos" + "/" + label

#tensorFlow parse all the folder in the tf_files
#if by any change it finds a folder or a subfolder with a name
#and picture instead of it. It will train on those pictures
#with a label as the name of the folder

#first thing is we chech whether a folder exists with the given
#name so to know whether our Neural Networks are already trained
#on that label if not we make a directory with all crawled picture
#inside of it and start the training process
if not os.path.exists(directory):
    #the label does not exists so we create one
    os.mkdir(directory)
    #crawling from google image on X label
    google_crawler = GoogleImageCrawler(directory)
    google_crawler.crawl(keyword=label,
                         offset=0,
                         max_num=40,
                         date_min=None,
                         date_max=None,
                         feeder_thr_num=1,
                         parser_thr_num=1,
                         downloader_thr_num=4,
                         min_size=(200, 200),
                         max_size=None)
    #delete all picture found in the directory that are not JPEG
    #by checking there extention and by running the file command
    #and parse whether there is the world JPEG image data in it
    #to confirm that is not a corrupted JPEG file
    for root, dirs, files in os.walk(directory):