Beispiel #1
0
def load_flickr25k_dataset(tag='sky',
                           path="data",
                           n_threads=50,
                           printable=False):
    """Load Flickr25K dataset.

    Returns a list of images by a given tag from Flick25k dataset,
    it will download Flickr25k from `the official website <http://press.liacs.nl/mirflickr/mirdownload.html>`__
    at the first time you use it.

    Parameters
    ------------
    tag : str or None
        What images to return.
            - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`__.
            - If you want to get all images, set to ``None``.

    path : str
        The path that the data is downloaded to, defaults is ``data/flickr25k/``.
    n_threads : int
        The number of thread to read image.
    printable : boolean
        Whether to print infomation when reading images, default is ``False``.

    Examples
    -----------
    Get images with tag of sky

    >>> images = tl.files.load_flickr25k_dataset(tag='sky')

    Get all images

    >>> images = tl.files.load_flickr25k_dataset(tag=None, n_threads=100, printable=True)

    """
    path = os.path.join(path, 'flickr25k')

    filename = 'mirflickr25k.zip'
    url = 'http://press.liacs.nl/mirflickr/mirflickr25k/'

    # download dataset
    if folder_exists(os.path.join(path, "mirflickr")) is False:
        logging.info("[*] Flickr25k is nonexistent in {}".format(path))
        maybe_download_and_extract(filename, path, url, extract=True)
        del_file(os.path.join(path, filename))

    # return images by the given tag.
    # 1. image path list
    folder_imgs = os.path.join(path, "mirflickr")
    path_imgs = load_file_list(path=folder_imgs,
                               regx='\\.jpg',
                               printable=False)
    path_imgs.sort(key=natural_keys)

    # 2. tag path list
    folder_tags = os.path.join(path, "mirflickr", "meta", "tags")
    path_tags = load_file_list(path=folder_tags,
                               regx='\\.txt',
                               printable=False)
    path_tags.sort(key=natural_keys)

    # 3. select images
    if tag is None:
        logging.info("[Flickr25k] reading all images")
    else:
        logging.info("[Flickr25k] reading images with tag: {}".format(tag))
    images_list = []
    for idx, _v in enumerate(path_tags):
        tags = read_file(os.path.join(folder_tags, path_tags[idx])).split('\n')
        # logging.info(idx+1, tags)
        if tag is None or tag in tags:
            images_list.append(path_imgs[idx])

    images = visualize.read_images(images_list,
                                   folder_imgs,
                                   n_threads=n_threads,
                                   printable=printable)
    return images
Beispiel #2
0
 def load_image_from_folder(path):
     path_imgs = load_file_list(path=path, regx='\\.jpg', printable=False)
     return visualize.read_images(path_imgs,
                                  path=path,
                                  n_threads=10,
                                  printable=False)
 def load_image_from_folder(path):
     path_imgs = load_file_list(path=path, regx='\\.jpg', printable=False)
     return visualize.read_images(path_imgs, path=path, n_threads=10, printable=False)
def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printable=False):
    """Load Flick1M dataset.

    Returns a list of images by a given tag from Flickr1M dataset,
    it will download Flickr1M from `the official website <http://press.liacs.nl/mirflickr/mirdownload.html>`__
    at the first time you use it.

    Parameters
    ------------
    tag : str or None
        What images to return.
            - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`__.
            - If you want to get all images, set to ``None``.

    size : int
        integer between 1 to 10. 1 means 100k images ... 5 means 500k images, 10 means all 1 million images. Default is 10.
    path : str
        The path that the data is downloaded to, defaults is ``data/flickr25k/``.
    n_threads : int
        The number of thread to read image.
    printable : boolean
        Whether to print infomation when reading images, default is ``False``.

    Examples
    ----------
    Use 200k images

    >>> images = tl.files.load_flickr1M_dataset(tag='zebra', size=2)

    Use 1 Million images

    >>> images = tl.files.load_flickr1M_dataset(tag='zebra')

    """
    import shutil

    path = os.path.join(path, 'flickr1M')
    logging.info("[Flickr1M] using {}% of images = {}".format(size * 10, size * 100000))
    images_zip = [
        'images0.zip', 'images1.zip', 'images2.zip', 'images3.zip', 'images4.zip', 'images5.zip', 'images6.zip',
        'images7.zip', 'images8.zip', 'images9.zip'
    ]
    tag_zip = 'tags.zip'
    url = 'http://press.liacs.nl/mirflickr/mirflickr1m/'

    # download dataset
    for image_zip in images_zip[0:size]:
        image_folder = image_zip.split(".")[0]
        # logging.info(path+"/"+image_folder)
        if folder_exists(os.path.join(path, image_folder)) is False:
            # logging.info(image_zip)
            logging.info("[Flickr1M] {} is missing in {}".format(image_folder, path))
            maybe_download_and_extract(image_zip, path, url, extract=True)
            del_file(os.path.join(path, image_zip))
            # os.system("mv {} {}".format(os.path.join(path, 'images'), os.path.join(path, image_folder)))
            shutil.move(os.path.join(path, 'images'), os.path.join(path, image_folder))
        else:
            logging.info("[Flickr1M] {} exists in {}".format(image_folder, path))

    # download tag
    if folder_exists(os.path.join(path, "tags")) is False:
        logging.info("[Flickr1M] tag files is nonexistent in {}".format(path))
        maybe_download_and_extract(tag_zip, path, url, extract=True)
        del_file(os.path.join(path, tag_zip))
    else:
        logging.info("[Flickr1M] tags exists in {}".format(path))

    # 1. image path list
    images_list = []
    images_folder_list = []
    for i in range(0, size):
        images_folder_list += load_folder_list(path=os.path.join(path, 'images%d' % i))
    images_folder_list.sort(key=lambda s: int(s.split('/')[-1]))  # folder/images/ddd

    for folder in images_folder_list[0:size * 10]:
        tmp = load_file_list(path=folder, regx='\\.jpg', printable=False)
        tmp.sort(key=lambda s: int(s.split('.')[-2]))  # ddd.jpg
        images_list.extend([os.path.join(folder, x) for x in tmp])

    # 2. tag path list
    tag_list = []
    tag_folder_list = load_folder_list(os.path.join(path, "tags"))

    # tag_folder_list.sort(key=lambda s: int(s.split("/")[-1]))  # folder/images/ddd
    tag_folder_list.sort(key=lambda s: int(os.path.basename(s)))

    for folder in tag_folder_list[0:size * 10]:
        tmp = load_file_list(path=folder, regx='\\.txt', printable=False)
        tmp.sort(key=lambda s: int(s.split('.')[-2]))  # ddd.txt
        tmp = [os.path.join(folder, s) for s in tmp]
        tag_list += tmp

    # 3. select images
    logging.info("[Flickr1M] searching tag: {}".format(tag))
    select_images_list = []
    for idx, _val in enumerate(tag_list):
        tags = read_file(tag_list[idx]).split('\n')
        if tag in tags:
            select_images_list.append(images_list[idx])

    logging.info("[Flickr1M] reading images with tag: {}".format(tag))
    images = visualize.read_images(select_images_list, '', n_threads=n_threads, printable=printable)
    return images
def load_flickr25k_dataset(tag='sky', path="data", n_threads=50, printable=False):
    """Load Flickr25K dataset.

    Returns a list of images by a given tag from Flick25k dataset,
    it will download Flickr25k from `the official website <http://press.liacs.nl/mirflickr/mirdownload.html>`__
    at the first time you use it.

    Parameters
    ------------
    tag : str or None
        What images to return.
            - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`__.
            - If you want to get all images, set to ``None``.

    path : str
        The path that the data is downloaded to, defaults is ``data/flickr25k/``.
    n_threads : int
        The number of thread to read image.
    printable : boolean
        Whether to print infomation when reading images, default is ``False``.

    Examples
    -----------
    Get images with tag of sky

    >>> images = tl.files.load_flickr25k_dataset(tag='sky')

    Get all images

    >>> images = tl.files.load_flickr25k_dataset(tag=None, n_threads=100, printable=True)

    """
    path = os.path.join(path, 'flickr25k')

    filename = 'mirflickr25k.zip'
    url = 'http://press.liacs.nl/mirflickr/mirflickr25k/'

    # download dataset
    if folder_exists(os.path.join(path, "mirflickr")) is False:
        logging.info("[*] Flickr25k is nonexistent in {}".format(path))
        maybe_download_and_extract(filename, path, url, extract=True)
        del_file(os.path.join(path, filename))

    # return images by the given tag.
    # 1. image path list
    folder_imgs = os.path.join(path, "mirflickr")
    path_imgs = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False)
    path_imgs.sort(key=natural_keys)

    # 2. tag path list
    folder_tags = os.path.join(path, "mirflickr", "meta", "tags")
    path_tags = load_file_list(path=folder_tags, regx='\\.txt', printable=False)
    path_tags.sort(key=natural_keys)

    # 3. select images
    if tag is None:
        logging.info("[Flickr25k] reading all images")
    else:
        logging.info("[Flickr25k] reading images with tag: {}".format(tag))
    images_list = []
    for idx, _v in enumerate(path_tags):
        tags = read_file(os.path.join(folder_tags, path_tags[idx])).split('\n')
        # logging.info(idx+1, tags)
        if tag is None or tag in tags:
            images_list.append(path_imgs[idx])

    images = visualize.read_images(images_list, folder_imgs, n_threads=n_threads, printable=printable)
    return images