Python BingImageCrawler.crawl 예제들, icrawler.builtin.BingImageCrawler.crawl Python 예제들

예제 #1

0

파일 보기

파일: crawler.py 프로젝트: hatsuto-suzuki/icrawler

def exe_crawl(arg):
    google_crawler = GoogleImageCrawler(
        downloader_cls=PrefixNameGoogleDownloader,
        feeder_threads=1,
        parser_threads=1,
        downloader_threads=4,
        storage={'root_dir': f'{arg.dict}/{arg.keyword}/google'})
    filters = dict(license=f'{arg.license}')
    google_crawler.crawl(keyword=f'{arg.keyword}',
                         filters=filters,
                         offset=0,
                         max_num=arg.max,
                         file_idx_offset=0)

    bing_crawler = BingImageCrawler(
        downloader_cls=PrefixNameBingDownloader,
        downloader_threads=4,
        storage={'root_dir': f'{arg.dict}/{arg.keyword}/bing'})
    bing_crawler.crawl(keyword=f'{arg.keyword}',
                       filters=filters,
                       offset=0,
                       max_num=arg.max)

    baidu_crawler = BaiduImageCrawler(
        downloader_cls=PrefixNameBaiduDownloader,
        storage={'root_dir': f'{arg.dict}/{arg.keyword}/baidu'})
    baidu_crawler.crawl(keyword=f'{arg.keyword}', offset=0, max_num=arg.max)

예제 #2

0

파일 보기

def crawl_book(book, tag, add):
    num = book.split('.')[0]
    title = tag[num]['title']
    entities = json.load(open(os.path.join(ENTITY_DIR, book), 'r'))
    crawled = set()
    for cpt in entities:
        for entity in entities[cpt]['cpt_key'][:2]:
            entity = entity[0]
            outpath = os.path.join(IMAGE_DIR, num, entity)
            if entity not in crawled and not os.path.exists(
                    outpath) or add and len(os.listdir(outpath)) == 0:
                title = title.replace('"', '').replace("'", '')
                keyword = f"illustration {entity} {title}"
                # command = f'python {os.path.join(ROOT_DIR,"image_crawler","bbid.py")} -s "illustration {entity} {title}" -o "{outpath}" --limit 4' if not add else f'python {os.path.join(ROOT_DIR,"image_crawler","bbid.py")} -s "{entity} in {title}" -o "{outpath}" --limit 4'
                # cmd = os.popen(command)
                # cmd.read()
                # cmd.close()
                # print(f'book {num}, entity \'{entity}\' crawled.')
                # break
                bing_crawler = BingImageCrawler(storage={'root_dir': outpath})
                bing_crawler.crawl(keyword=keyword, max_num=2)
            else:
                # print(f'{entity} passed.')
                continue
        # break
    print(f'==book {num}, \'{title}\' finished.==')

예제 #3

0

파일 보기

파일: Bing_Ver.py 프로젝트: Chisenon/icra-collecter

def pring():
    # new folder name
    new_path = folder_path.get() + "\\" + folder_name.get()
    if not os.path.exists(new_path):
        # make new folder
        os.mkdir(new_path)
        # print("create new folder")
        # main program
        crawler = BingImageCrawler(storage={"root_dir": new_path})
        if combo_1.get() != "None":
            filters = dict(size=combo_1.get())
        else:
            filters = None
        crawler.crawl(keyword=picture_name.get(),
                      filters=filters,
                      offset=0,
                      max_num=int(picture_num.get()))
        res = messagebox.askokcancel('finished!!!',
                                     'Reset imput, but check folder?')
        folder_name.delete(0, "end")
        picture_name.delete(0, "end")
        picture_num.delete(0, "end")
        if res:
            tkinter.filedialog.askopenfilename(initialdir=new_path)
    else:

        messagebox.showinfo(
            'failed...',
            'A folder with the same name already exists\n' + new_path)

예제 #4

0

파일 보기

파일: crawler_img_better.py 프로젝트: sean810720/myturn

def getImg(keywords='', dirpath='', amount=0, source=4):
    if source == 1:
        print('\n--- 開始從「Google 圖片」下載---\n')
        google_crawler = GoogleImageCrawler(parser_threads=2, downloader_threads=4,storage={'root_dir': dirpath})
        google_crawler.crawl(keyword=keywords, offset=0, max_num=amount, date_min=None, date_max=None, min_size=(200,200), max_size=None)

    elif source == 2:
        print('\n--- 開始從「Microsoft Bing」下載---\n')
        bing_crawler = BingImageCrawler(downloader_threads=4, storage={'root_dir': dirpath})
        bing_crawler.crawl(keyword=keywords, offset=0, max_num=amount, min_size=None, max_size=None)

    elif source == 3:
        print('\n--- 開始從「百度」下載---\n')
        baidu_crawler = BaiduImageCrawler(storage={'root_dir': dirpath})
        baidu_crawler.crawl(keyword=keywords, offset=0, max_num=amount, min_size=None, max_size=None)

    else:
        print('\n--- 開始從「Google 圖片」下載---\n')
        google_crawler = GoogleImageCrawler(parser_threads=2, downloader_threads=4,storage={'root_dir': dirpath})
        google_crawler.crawl(keyword=keywords, offset=0, max_num=amount, date_min=None, date_max=None, min_size=(200,200), max_size=None)
        print('\n--- 開始從「Microsoft Bing」下載---\n')
        bing_crawler = BingImageCrawler(downloader_threads=4, storage={'root_dir': dirpath})
        bing_crawler.crawl(keyword=keywords, offset=0, max_num=amount, min_size=None, max_size=None)
        print('\n--- 開始從「百度」下載---\n')
        baidu_crawler = BaiduImageCrawler(storage={'root_dir': dirpath})
        baidu_crawler.crawl(keyword=keywords, offset=0, max_num=amount, min_size=None, max_size=None)

예제 #5

0

파일 보기

def getImg(keywords='', dirpath='', amount=0, source=4):
    if source == 1:
        print('\n--- 開始從「Google 圖片」下載---\n')
        google_crawler = GoogleImageCrawler(parser_threads=2,
                                            downloader_threads=4,
                                            storage={'root_dir': dirpath})
        google_crawler.crawl(keyword=keywords,
                             offset=0,
                             max_num=amount,
                             date_min=None,
                             date_max=None,
                             min_size=(200, 200),
                             max_size=None)

    elif source == 2:
        print('\n--- 開始從「Microsoft Bing」下載---\n')
        bing_crawler = BingImageCrawler(downloader_threads=4,
                                        storage={'root_dir': dirpath})
        bing_crawler.crawl(keyword=keywords,
                           offset=0,
                           max_num=amount,
                           min_size=None,
                           max_size=None)

    elif source == 3:
        print('\n--- 開始從「百度」下載---\n')
        baidu_crawler = BaiduImageCrawler(storage={'root_dir': dirpath})
        baidu_crawler.crawl(keyword=keywords,
                            offset=0,
                            max_num=amount,
                            min_size=None,
                            max_size=None)

    else:
        print('\n--- 開始從「Google 圖片」下載---\n')
        google_crawler = GoogleImageCrawler(parser_threads=2,
                                            downloader_threads=4,
                                            storage={'root_dir': dirpath})
        google_crawler.crawl(keyword=keywords,
                             offset=0,
                             max_num=amount,
                             date_min=None,
                             date_max=None,
                             min_size=(200, 200),
                             max_size=None)
        print('\n--- 開始從「Microsoft Bing」下載---\n')
        bing_crawler = BingImageCrawler(downloader_threads=4,
                                        storage={'root_dir': dirpath})
        bing_crawler.crawl(keyword=keywords,
                           offset=0,
                           max_num=amount,
                           min_size=None,
                           max_size=None)
        print('\n--- 開始從「百度」下載---\n')
        baidu_crawler = BaiduImageCrawler(storage={'root_dir': dirpath})
        baidu_crawler.crawl(keyword=keywords,
                            offset=0,
                            max_num=amount,
                            min_size=None,
                            max_size=None)

예제 #6

0

파일 보기

파일: imageGrabber.py 프로젝트: ebarns/ImageGrabber

 def getImagesFromBing(self, query):
     bing_crawler = BingImageCrawler(downloader_threads=4)
     bing_crawler.crawl(keyword=query,
                        offset=0,
                        max_num=self.num_of_images,
                        min_size=(self.min_width, self.min_height),
                        max_size=None)

예제 #7

0

파일 보기

파일: image_downloader.py 프로젝트: jinjinhong/simdat

def main():
    parser = argparse.ArgumentParser(
        description="Simple tool to download images via Google Search.")
    parser.add_argument("-t", "--test", action='store_true')
    parser.add_argument("-p",
                        "--path",
                        type=str,
                        default=os.getcwd(),
                        help="Path to output images")
    parser.add_argument("-n",
                        "--num",
                        type=int,
                        default=100,
                        help="How many images do you need.")
    parser.add_argument("--threads",
                        type=int,
                        default=4,
                        help="Number of threads to run.")
    parser.add_argument("-o", "--offset", type=int, default=0, help="Offset.")
    parser.add_argument("--min",
                        type=int,
                        default=None,
                        help="Minimum size of the image.")
    parser.add_argument("--max",
                        type=int,
                        default=None,
                        help="Maximum size of the image.")

    args = parser.parse_args()

    t0 = time.time()
    check_dir(args.path)
    for kwd in search_for:
        subdir = os.path.join(args.path, kwd)
        check_dir(subdir)
        print(" Item name = ", kwd)
        if len(search_for[kwd]) == 0:
            bing_crawler = BingImageCrawler(subdir)
            bing_crawler.crawl(keyword=kwd,
                               offset=args.offset,
                               max_num=args.num,
                               feeder_thr_num=1,
                               parser_thr_num=1,
                               downloader_thr_num=args.threads,
                               min_size=args.min,
                               max_size=args.max)
        else:
            for j in range(0, len(search_for[kwd])):
                print("    : %s" % search_for[kwd][j])
                ssubdir = os.path.join(subdir, search_for[kwd][j])
                check_dir(ssubdir)
                pure_keyword = '%20' + search_for[kwd][j]
                pure_keyword = kwd + pure_keyword.replace(' ', '%20')
                bing_crawler = BingImageCrawler(storage={'root_dir': ssubdir})
                bing_crawler.crawl(keyword=pure_keyword,
                                   offset=args.offset,
                                   max_num=args.num,
                                   min_size=args.min,
                                   max_size=args.max)

예제 #8

0

파일 보기

파일: fc_download.py 프로젝트: royam0820/fastclass

def crawl(
        folder: str,
        search: str,
        maxnum: int,
        crawlers: [List[str]] = ['GOOGLE', 'BING', 'BAIDU']) -> Dict[str, str]:
    """Crawl web sites for images"""
    print('(1) Crawling ...')
    # prepare folders
    os.makedirs(folder, exist_ok=True)

    sources = {}
    if maxnum > 1000:
        print("Max num limited to 1000")
        maxnum = 1000

    for c in crawlers:
        print(f'    -> {c}')
        if c == 'GOOGLE':
            google_crawler = GoogleImageCrawler(
                downloader_cls=CustomDownloader,
                log_level=logging.CRITICAL,
                feeder_threads=1,
                parser_threads=1,
                downloader_threads=4,
                storage={'root_dir': folder})

            google_crawler.crawl(keyword=search,
                                 offset=0,
                                 max_num=maxnum,
                                 min_size=(200, 200),
                                 max_size=None,
                                 file_idx_offset=0)

        if c == 'BING':
            bing_crawler = BingImageCrawler(downloader_cls=CustomDownloader,
                                            log_level=logging.CRITICAL,
                                            downloader_threads=4,
                                            storage={'root_dir': folder})
            bing_crawler.crawl(keyword=search,
                               filters=None,
                               offset=0,
                               max_num=maxnum,
                               file_idx_offset='auto')

        if c == 'BAIDU':
            baidu_crawler = BaiduImageCrawler(downloader_cls=CustomDownloader,
                                              log_level=logging.CRITICAL,
                                              storage={'root_dir': folder})
            baidu_crawler.crawl(keyword=search,
                                offset=0,
                                max_num=maxnum,
                                min_size=(200, 200),
                                max_size=None,
                                file_idx_offset='auto')

    return {
        k: v
        for k, v in CustomDownloader.registry.items() if k is not None
    }

예제 #9

0

파일 보기

def crawl_it(name_to_search):
    google_crawler = BingImageCrawler(
    feeder_threads=1,
    parser_threads=2,
    downloader_threads=4,
    downloader_cls=MyImageDownloader,
    storage={'root_dir': 'images/google'})
    google_crawler.crawl(keyword=name_to_search,  max_num=20, file_idx_offset=0)

예제 #10

0

파일 보기

파일: logocrawler.py 프로젝트: yanxp/ParallelSSD

def test_bing(logo):
    bing_crawler = BingImageCrawler(
        downloader_cls=MyImageDownloader,
        downloader_threads=4,
        storage={'root_dir': os.path.join(root, logo, 'bing')},
        log_level=logging.INFO,
        filename=os.path.join(root, logo, 'bing.txt'))
    bing_crawler.crawl(logo, max_num=args.maxnum)

예제 #11

0

파일 보기

def search(names):
    names = list(set(names))
    for name in names:
        profile_dirpath = os.path.join(PHOTO_DIRPATH, name)
        os.makedirs(profile_dirpath, exist_ok=True)
        crawler = BingImageCrawler(storage={"root_dir": profile_dirpath})
        crawler.crawl(keyword=name, max_num=100)
        time.sleep(WAITING_TIME_SEARCH)

예제 #12

0

파일 보기

def download_data(dataset_name, classes, num_images):
    for c in classes:
        bing_crawler = BingImageCrawler(
            downloader_threads=6,
            storage={'root_dir': f'../static/datasets/{dataset_name}/{c}'})
        bing_crawler.crawl(keyword=c,
                           filters=None,
                           offset=0,
                           max_num=num_images * 2)

예제 #13

0

파일 보기

def fetchAndCropFace(keyword, max_num):
    input_file_path = f"./training_data/original/{keyword}/"
    output_file_path = f"./training_data/cropped_face/{keyword}/"

    echo("crawl images")
    crawler = BingImageCrawler(storage={"root_dir": input_file_path})
    # crawler = GoogleImageCrawler(storage={"root_dir": input_file_path})
    crawler.crawl(keyword=keyword, max_num=int(max_num))

    echo("original images in: " + input_file_path)
    os.makedirs(input_file_path, exist_ok=True)
    echo("cropped face images in: " + output_file_path)
    os.makedirs(output_file_path, exist_ok=True)
    input_files = os.listdir(input_file_path)

    echo("fetched images count: " + str(len(input_files)))
    cascade = cv2.CascadeClassifier("./haarcascade_frontalface_alt.xml")

    windowName = 'window'
    cv2.namedWindow(windowName, cv2.WINDOW_KEEPRATIO | cv2.WINDOW_NORMAL)
    cv2.resizeWindow(windowName, 500, 500)

    for input_file in input_files:
        input_image = cv2.imread(input_file_path + input_file)

        if input_image is None:
            continue

        height, width, _ = input_image.shape
        cv2.imshow(windowName, input_image)
        cv2.waitKey(50)

        echo("detect face")
        face_rects = cascade.detectMultiScale(input_image,
                                              scaleFactor=1.1,
                                              minNeighbors=10,
                                              minSize=(10, 10))

        if len(face_rects) == 0:
            echo("detect face failure")
            continue

        echo("detect face success")

        # 最初に検出した顔のみ取得
        face_rect = face_rects[0]

        # 顔領域だけ切り出して保存
        output_image = crop(input_image, face_rect)
        cv2.imwrite(output_file_path + input_file, output_image)

        # 顔領域に矩形を描画して表示
        marked_input_image = drawRect(input_image, face_rect, (0, 255, 0))
        cv2.imshow(windowName, marked_input_image)
        cv2.waitKey(50)

    cv2.destroyAllWindows()

예제 #14

0

파일 보기

파일: MangaImagesCrawler.py 프로젝트: amitaydr/Hackaton2018

def test_bing():
    img_dir = osp.join(test_dir, 'bing2')
    bing_crawler = BingImageCrawler(downloader_threads=2,
                                    storage={'root_dir': img_dir},
                                    log_level=logging.INFO)
    search_filters = dict(type='photo', license='commercial')
    bing_crawler.crawl('manga face color',
                       max_num=1000,
                       filters=search_filters)

예제 #15

0

파일 보기

def download_images(keyword):
    crawler = BingImageCrawler(parser_threads=5,
                               downloader_threads=5,
                               storage={
                                   'backend': 'FileSystem',
                                   'root_dir': 'images'
                               })
    crawler.crawl(keyword=keyword,
                  max_num=max_images,
                  filters={'size': 'medium'})

예제 #16

0

파일 보기

def crawl_bing(folder: str, search: str, maxnum: int, num_threads: int):
    bing_crawler = BingImageCrawler(downloader_cls=CustomDownloader,
                                    log_level=logging.CRITICAL,
                                    downloader_threads=num_threads,
                                    storage={'root_dir': folder})
    bing_crawler.crawl(keyword=search,
                       filters=None,
                       offset=0,
                       max_num=maxnum,
                       file_idx_offset='auto')

예제 #17

0

파일 보기

def get_images(keyword):

    bing_crawler = BingImageCrawler(downloader_cls=CustomLinkPrinter)
    bing_crawler.downloader.file_urls = []
    bing_crawler.crawl(keyword=keyword, max_num=50)

    file_urls = bing_crawler.downloader.file_urls

    print(file_urls)
    make_hash_df(file_urls, keyword)

예제 #18

0

파일 보기

파일: image_crawler.py 프로젝트: cx2200252/YXL_code

 def doBing(self):
     for keyword in self.keywords:
         bing_storage = {'root_dir': '%s/bing-%s/' % (self.dest_dir, keyword)}
         if os.path.exists(bing_storage['root_dir']):
             continue
         bing_crawler = BingImageCrawler(parser_threads=self.thread_parser,
                                         downloader_threads=self.thread_downloader,
                                         storage=bing_storage)
         bing_crawler.crawl(keyword=keyword,
                            max_num=100000)

예제 #19

0

파일 보기

파일: crawl.py 프로젝트: JM-221/webimg

def test_bing():
    print('start testing BingImageCrawler')
    bing_crawler = BingImageCrawler(downloader_threads=2,
                                    storage={'root_dir': 'images/bing'},
                                    log_level=logging.INFO)
    search_filters = dict(type='photo',
                          license='commercial',
                          layout='wide',
                          size='large',
                          date='pastmonth')
    bing_crawler.crawl('fatcat', max_num=10, filters=search_filters)

예제 #20

0

파일 보기

def crawl_images(keyword: str, max_num_images: int, save_dir: str,
                 feeder_threads: int, parser_threads: int,
                 downloader_threads: int):
    crawler = BingImageCrawler(
        feeder_threads=feeder_threads,
        parser_threads=parser_threads,
        downloader_threads=downloader_threads,
        log_level=logging.ERROR,
        storage={"root_dir": save_dir},
    )
    crawler.crawl(keyword=keyword, max_num=max_num_images)

예제 #21

0

파일 보기

파일: test_todo.py 프로젝트: JM-221/webimg

def test_bing():
    img_dir = osp.join(test_dir, 'bing')
    bing_crawler = BingImageCrawler(downloader_threads=2,
                                    storage={'root_dir': img_dir},
                                    log_level=logging.INFO)
    search_filters = dict(type='photo',
                          license='commercial',
                          layout='wide',
                          size='large',
                          date='pastmonth')
    bing_crawler.crawl('cat', max_num=5, filters=search_filters)
    shutil.rmtree(img_dir)

예제 #22

0

파일 보기

def download(keyword):
    fd = dict(color="white",
              size="medium",
              layout="tall")

    bing_crawler = BingImageCrawler(
        parser_threads=5, downloader_threads=5,
        storage={'root_dir': 'imgs'})
    bing_crawler.session.verify = False
    bing_crawler.crawl(keyword=keyword, max_num=10,
                       min_size=(10, 10), max_size=None,
                       filters=fd)

예제 #23

0

파일 보기

def main():
    argv = sys.argv

    if len(argv) < 4:
        usage()
        exit(1)

    if not os.path.isdir(argv[1]):
        os.makedirs(argv[1])

    crawler = BingImageCrawler(storage={"root_dir": argv[1]})
    crawler.crawl(keyword=argv[2], max_num=int(argv[3]))

예제 #24

0

파일 보기

파일: imageCrawler.py 프로젝트: yarden7696/Similarity_Project

def main(city):
    bing_crawler = BingImageCrawler(
        downloader_threads=2,
        #put the name of directory as city
        storage={'root_dir': 'images/' + city},
        log_level=logging.INFO)
    search_filters = dict(type='photo',
                          license='commercial',
                          layout='wide',
                          size='large',
                          date='pastmonth')
    bing_crawler.crawl(city, max_num=20, filters=None)

예제 #25

0

파일 보기

def download_images(celebs_file, save_dir):
    celebrities_list = []
    with open(celebs_file) as f:
        for line in f:
            celebrities_list.append(line.strip())

    for celeb in celebrities_list:
        images_dir = os.path.join(save_dir, celeb.lower().replace(" ", "_"))
        if not os.path.isdir(images_dir):
            os.mkdir(images_dir)
            bing_crawler = BingImageCrawler(downloader_threads=4,storage={'root_dir': images_dir})
            bing_crawler.crawl(keyword=celeb, filters=None, offset=0, max_num=1000)

예제 #26

0

파일 보기

파일: crawl.py 프로젝트: hellock/icrawler

def test_bing():
    print('start testing BingImageCrawler')
    bing_crawler = BingImageCrawler(
        downloader_threads=2,
        storage={'root_dir': 'images/bing'},
        log_level=logging.INFO)
    search_filters = dict(
        type='photo',
        license='commercial',
        layout='wide',
        size='large',
        date='pastmonth')
    bing_crawler.crawl('cat', max_num=10, filters=search_filters)

예제 #27

0

파일 보기

파일: test_todo.py 프로젝트: hellock/icrawler

def test_bing():
    img_dir = osp.join(test_dir, 'bing')
    bing_crawler = BingImageCrawler(
        downloader_threads=2,
        storage={'root_dir': img_dir},
        log_level=logging.INFO)
    search_filters = dict(
        type='photo',
        license='commercial',
        layout='wide',
        size='large',
        date='pastmonth')
    bing_crawler.crawl('cat', max_num=5, filters=search_filters)
    shutil.rmtree(img_dir)

예제 #28

0

파일 보기

파일: Crawl_img.py 프로젝트: supperPants/Image_crawl-

def test_bing(dir, keyword):
    keyword = keyword.replace(': flickr.com', '')
    print('启用bing爬虫', keyword)
    bing_crawler = BingImageCrawler(
        # parser_threads=16,
        downloader_cls=Base64NameDownloader,
        downloader_threads=16,
        storage={'root_dir': dir},
        log_level=logging.DEBUG)
    bing_crawler.crawl(keyword=keyword,
                       offset=0,
                       max_num=1000,
                       min_size=None,
                       max_size=None)

예제 #29

0

파일 보기

def crawel_auto(search_word, get_num, dir_name):
    print("Googleのクローリングを開始しました。")
    # Google
    googleCrawler = GoogleImageCrawler(storage={"root_dir": f'{dir_name}/google'})
    googleCrawler.crawl(keyword=search_word, max_num=get_num)

    print("Baiduのクローリングを開始しました。")
    #Baidu
    baiduCrawler = BaiduImageCrawler(storage={"root_dir": f'{dir_name}/baidu'})
    baiduCrawler.crawl(keyword=search_word, max_num=get_num)

    print("Bingのクローリングを開始しました。")
    Bing
    bingCrawler = BingImageCrawler(storage={"root_dir": f'{dir_name}/bing'})
    bingCrawler.crawl(keyword=search_word, max_num=get_num)

예제 #30

0

파일 보기

def download_image(
    file_directory, search_query
):  #webscraper function, google didn't work for me so we try the alternatives
    print(file_directory)

    bing_crawler = BingImageCrawler(parser_threads=2,
                                    downloader_threads=4,
                                    storage={'root_dir': file_directory})
    picture_count = 1

    for items in search_query:

        print(f"NEW IMAGE DOWNLOADING ITS AN IMAGE OF {items}")
        bing_crawler.crawl(keyword=items, filters=None, max_num=picture_count)
        picture_count += 1

예제 #31

0

파일 보기

def Crawl_Image(key_word, raw_folder=RAW_FOLDER):
    google_crawler = GoogleImageCrawler(
        feeder_threads=1,
        parser_threads=1,
        downloader_threads=6,
        storage={'root_dir': raw_folder + key_word})

    google_crawler.crawl(keyword=key_word, offset=0, max_num=1000,
                         min_size=None, max_size=None, file_idx_offset=0)
    bing_crawler = BingImageCrawler(downloader_threads=6, storage={'root_dir': raw_folder + key_word})
    bing_crawler.crawl(keyword=key_word, filters={'type': 'photo'}, offset=0, max_num=1000)

    baidu_crawler = BaiduImageCrawler(storage={'root_dir': raw_folder + key_word})
    baidu_crawler.crawl(keyword=key_word, offset=0, max_num=1000,
                        min_size=None, max_size=None)

예제 #32

0

파일 보기

파일: baidu_bing_image_crawler.py 프로젝트: disang3636/testgit

def image_crawler(baidu_path, bing_path, number_of_image, image_key_words):
    baidu_storage = {'root_dir': baidu_path}
    bing_storage = {'root_dir': bing_path}

    baidu_crawler = BaiduImageCrawler(parser_threads=8,
                                      downloader_threads=8,
                                      storage=baidu_storage)

    bingcrawler = BingImageCrawler(parser_threads=8,
                                   downloader_threads=8,
                                   storage=bing_storage)
    baidu_crawler.crawl(keyword=image_key_words,
                        max_num=number_of_image,
                        min_size=(200, 200))
    bingcrawler.crawl(keyword=image_key_words,
                      max_num=number_of_image,
                      min_size=(200, 200))

예제 #33

0

파일 보기

def download(key='cat',max_num=100,storage_dir=None,store_to_array={'convert':False,'return':False},flag=0,array_name=None,pad=(28,28),slash='\\'):
    if storage_dir==None:
        raise Exception("Please, please mention storage_dir it cannot be None")
    if type(max_num)!=type(0):
        raise ValueError(f'max number is not in type int type_found :-{type(max_num)}')
    if max_num<=0:
        raise ValueError("Sorry, max_num must be greater than 0")
    if type(key)!=type('cat'):
        raise ValueError(f'key is not in type str type_found :-{type(key)}')
    google=BingImageCrawler(storage={'root_dir':storage_dir})
    google.crawl(keyword=key,max_num=max_num)
    if store_to_array['convert']==True:
        array=convert_to_matrix(storage_dir,flag,pad,slash)
        if array_name!=None:
            save(array,array_name)
        if store_to_array['return']==True:
            return np.array(array)