Ejemplo n.º 1
0
def downloadImages():
    filename = 'flower_photos.tgz'
    dir_path = filename.split('.')[0]

    if not (os.path.exists(filename) or os.path.exists(dir_path)):
        url = ' https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz'
        urllib.request.urlretrieve(url, filename)

    if not os.path.exists(dir_path):
        tf = tarfile.open(filename)
        tf.extractall()

    # Initialize classes list, this list will contain the names of our classes.
    classes = []
    if os.path.exists(dir_path):
        # Iterate over the names of each class
        for class_name in os.listdir(dir_path):
            # Get the full path of each class
            class_path = os.path.join(dir_path, class_name)
            # Check if the class is a directory/folder
            if os.path.isdir(class_path):
                # Get the number of images in each class and print them
                No_of_images = len(os.listdir(class_path))
                print("Found {} images of {}".format(No_of_images, class_name))
                # Also store the name of each class
                classes.append(class_name)

        # Sort the list in alphabatical order and print it
        classes.sort()
        print(classes)

    return classes, dir_path
Ejemplo n.º 2
0
def untar_data(path):
    for file in os.scandir(path):
        if file.name.endswith('.zip'):
            print(
                os.stat(path + file.name).st_size, file.name,
                "this is zip file")
            with zipfile.ZipFile(file.name, 'r') as z:
                z.extractall(path)
        elif file.name.endswith('.gz'):
            '''specail condition needs to be added for .bin files'''
            print(
                os.stat(path + file.name).st_size, file.name,
                "this is gz file")

            tf = tarfile.open(file.name, "r")
            tf.extractall()
            tf.close()
        elif file.name.endswith('.tar'):
            print(
                os.stat(path + file.name).st_size, file.name,
                "this is tar file")
            tf = tarfile.open(file.name, "r")
            tf.extractall()
            tf.close()

    for file in os.scandir(path):
        print(file.name)
Ejemplo n.º 3
0
def download_and_extract(data_dir, download_dir):
    for url in urls:
        target_file = url.split('/')[-1]
        if target_file not in os.listdir(download_dir):
            print('Downloading', url)
            urllib.request.urlretrieve(url, os.path.join(download_dir, target_file))
            tf = tarfile.open(url.split('/')[-1])
            tf.extractall(data_dir)
        else:
            print('Already downloaded', url)
Ejemplo n.º 4
0
def class_20_to_10(y_aux):

    if not os.path.exists('cifar-100-python'):
        url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
        wget.download(url)
        tf = tarfile.open("cifar-100-python.tar.gz")
        tf.extractall()
        os.remove("cifar-100-python.tar.gz")

    def unpickle(file):
        import pickle
        with open(file, 'rb') as fo:
            dict = pickle.load(fo, encoding='bytes')
        return dict

    dict_try = unpickle('cifar-100-python/test')

    class_10 = {
        0: 1,
        1: 2,
        2: 5,
        3: 6,
        4: 5,
        5: 6,
        6: 6,
        7: 3,
        8: 0,
        9: 7,
        10: 8,
        11: 0,
        12: 1,
        13: 3,
        14: 4,
        15: 0,
        16: 2,
        17: 5,
        18: 9,
        19: 9
    }

    y_pri = np.zeros_like(y_aux)
    for i in range(len(y_aux)):
        y_pri[i] = class_10[y_aux[i, 0]]
    return y_pri
Ejemplo n.º 5
0
    def save_response_content(response, destination):
        CHUNK_SIZE = 32768

        with open(destination, "wb") as f:
            print("WRITING")
            for chunk in response.iter_content(CHUNK_SIZE):
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)
                    
        print("UNZIPPING...")
        import tarfile
        tf = tarfile.open(destination)
        tf.list()
        tf.extractall()
        print("UNZIPPED...")
        
        print("DELETING TAR FILE...")
        import os
        os.remove(destination)
        print("DELETED TAR FILE...")
Ejemplo n.º 6
0
def maybe_download(filename, work_directory, source_url):
    if gfile.Exists(work_directory):
        return

    gfile.MakeDirs(work_directory)

    filepath = os.path.join(work_directory, filename)
    if not gfile.Exists(filepath):
        print('Downloading', filename)
        temp_file_name, _ = urllib.request.urlretrieve(source_url)
        gfile.Copy(temp_file_name, filepath)
        with gfile.GFile(filepath) as f:
            size = f.size()
        print('Successfully downloaded', filename, size, 'bytes.')

        print("Extracting: " + filepath)
        base_path = os.path.dirname(filepath)
        with tarfile.open(filepath, "r:gz") as tf:
            tf.extractall(base_path)
        os.remove(filepath)
    return filepath
Ejemplo n.º 7
0
import cv2
# scikit-image package
import skimage
import numpy as np
import random

from sklearn.model_selection import train_test_split

tf.enable_eager_execution()
tf.set_random_seed(0)
np.random.seed(0)

# unzip tar file
import tarfile
tf = tarfile.open("drive/My Drive/Colab Notebooks/notMNIST_large.tar")
tf.extractall()
tf.close()

train_dir = "notMNIST_large/"

imgs = []
labels = []

imageSize = 28


# load data and labels
def get_data(folder):
    imgs = []
    labels = []
    for folderName in os.listdir(folder):
Ejemplo n.º 8
0
from keras import Input, Model
from keras.applications import InceptionResNetV2
from keras.callbacks import TensorBoard
from keras.layers import Conv2D, Flatten, Dense, BatchNormalization, Reshape, concatenate, LeakyReLU, Lambda, Activation, UpSampling2D, Dropout
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras_preprocessing import image
from scipy.io import loadmat

# In[]: Nomor 1
from google.colab import drive
drive.mount('/content/drive')

import tarfile
tf = tarfile.open("/content/drive/My Drive/Chapter 9 AI/wiki_crop.tar")
tf.extractall(path="/content/drive/My Drive/Chapter 9 AI")

# In[]: Nomor 2
def load_data(wiki_dir, dataset='wiki'):
    # Load the wiki.mat file
    meta = loadmat(os.path.join(wiki_dir, "{}.mat".format(dataset)))

    # Load the list of all files
    full_path = meta[dataset][0, 0]["full_path"][0]

    # List of Matlab serial date numbers
    dob = meta[dataset][0, 0]["dob"][0]

    # List of years when photo was taken
    photo_taken = meta[dataset][0, 0]["photo_taken"][0]  # year
Ejemplo n.º 9
0
def maybe_download_and_extract(url_source,
                               download_directory,
                               filename=None,
                               extract=True,
                               expected_bytes=None):
    """
    Check if file exists in download_directory otherwise tries to dowload and extract it.

    Parameters
    ----------
    url_source : str
        The URL to download the file from
    download_directory : str
        A folder path to search for the file in and dowload the file to
    filename : str
        The name of the (to be) dowloaded file.
    extract : boolean
        If True, tries to uncompress the dowloaded file, default is True.
        Supported formats are ".tar.gz/.tar.bz2" or ".zip" files.
        If different format, extraction is skipped.
    expected_bytes : int or None
        If set tries to verify that the downloaded file is of the specified size,
        otherwise raises an Exception, defaults is None which corresponds to no check.

    Returns
    -------
    str
        File path of the dowloaded (uncompressed) file.

    Examples
    --------
    >>> res = maybe_download_and_extract(
    ...     url_source='http://yann.lecun.com/exdb/mnist/',
    ...     download_directory='data/')

    """
    # Create a download directory if not already existing
    if not os.path.exists(download_directory):
        os.makedirs(download_directory)

    if filename is None:
        # Get the filename from the URL
        filename_with_extension = url_source.split('/')[-1]
        filename = filename_with_extension.split('.')[0]
    else:
        filename_with_extension = filename

    filepath = os.path.join(download_directory, filename)
    # Download the file if not already existing
    if not os.path.exists(filepath):

        def _progress(count, block_size, total_size):
            percentage = float(count * block_size) / float(total_size) * 100.0
            sys.stdout.write(f'\r>> Downloading {filename} {percentage:.1f}%')
            sys.stdout.flush()

        # The downloaded file must end with a correct extension to avoid problems
        filepath_with_extension = os.path.join(download_directory,
                                               filename_with_extension)
        urllib.request.urlretrieve(url_source, filepath_with_extension,
                                   _progress)

        statinfo = os.stat(filepath_with_extension)
        print(f'Succesfully downloaded {filename} {statinfo.st_size} bytes.')
        if (not (expected_bytes is None)
                and (expected_bytes != statinfo.st_size)):
            raise Exception(
                f'Failed to verify {filename}. Can you get to it with a browser?'
            )
        if (extract):
            try:
                print(f'Trying to extract archive {filepath_with_extension}')
                if tarfile.is_tarfile(filepath_with_extension):
                    with tarfile.open(filepath_with_extension, 'r') as tf:
                        # If the archive contains more than 1 file, extract into a directory
                        if archive_has_toplevel_dir(tf.getnames()):
                            tf.extractall(download_directory)
                        else:
                            tf.extractall(
                                os.path.join(download_directory, filename))
                    # Remove the downloaded file if we extracted it
                    os.remove(filepath_with_extension)
                elif zipfile.is_zipfile(filepath_with_extension):
                    with zipfile.ZipFile(filepath_with_extension) as zf:
                        # If the archive contains more than 1 file, extract into a directory
                        if archive_has_toplevel_dir(zf.namelist()):
                            zf.extractall(download_directory)
                        else:
                            zf.extractall(
                                os.path.join(download_directory, filename))
                    # Remove the downloaded file if we extracted it
                    os.remove(filepath_with_extension)
                else:
                    print('Skipping file extraction as format not supported')
                    # The file has not been extracted, so we keep the extension
                    filepath = filepath_with_extension
            except Exception as e:
                # Remove the downloaded file before raising the exception
                os.remove(filepath_with_extension)
                raise (e)

    return filepath