コード例 #1
0
    def download(self):
        if self._check_exists():
            return

        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")

        os.makedirs(self.raw_folder, exist_ok=True)
        os.makedirs(self.processed_folder, exist_ok=True)

        def save_record(records, record_id, tt, vals, mask, labels):
            tt = torch.tensor(tt).to(self.device)

            vals = torch.stack(vals)
            mask = torch.stack(mask)
            labels = torch.stack(labels)

            # flatten the measurements for different tags
            vals = vals.reshape(vals.size(0), -1)
            mask = mask.reshape(mask.size(0), -1)
            assert (len(tt) == vals.size(0))
            assert (mask.size(0) == vals.size(0))
            assert (labels.size(0) == vals.size(0))

            #records.append((record_id, tt, vals, mask, labels))

            seq_length = len(tt)
            # split the long time series into smaller ones
            offset = 0
            slide = self.max_seq_length // 2

            while (offset + self.max_seq_length < seq_length):
                idx = range(offset, offset + self.max_seq_length)

                first_tp = tt[idx][0]
                records.append((record_id, tt[idx] - first_tp, vals[idx],
                                mask[idx], labels[idx]))
                offset += slide

        for url in self.urls:
            filename = url.rpartition('/')[2]
            download_url(url, self.raw_folder, filename, None)

            print('Processing {}...'.format(filename))

            dirname = os.path.join(self.raw_folder)
            records = []
            first_tp = None

            for txtfile in os.listdir(dirname):
                with open(os.path.join(dirname, txtfile)) as f:
                    lines = f.readlines()
                    prev_time = -1
                    tt = []

                    record_id = None
                    for l in lines:
                        cur_record_id, tag_id, time, date, val1, val2, val3, label = l.strip(
                        ).split(',')
                        value_vec = torch.Tensor((float(val1), float(val2),
                                                  float(val3))).to(self.device)
                        time = float(time)

                        if cur_record_id != record_id:
                            if record_id is not None:
                                save_record(records, record_id, tt, vals, mask,
                                            labels)
                            tt, vals, mask, nobs, labels = [], [], [], [], []
                            record_id = cur_record_id

                            tt = [torch.zeros(1).to(self.device)]
                            vals = [
                                torch.zeros(len(self.tag_ids),
                                            3).to(self.device)
                            ]
                            mask = [
                                torch.zeros(len(self.tag_ids),
                                            3).to(self.device)
                            ]
                            nobs = [
                                torch.zeros(len(self.tag_ids)).to(self.device)
                            ]
                            labels = [
                                torch.zeros(len(self.label_names)).to(
                                    self.device)
                            ]

                            first_tp = time
                            time = round((time - first_tp) / 10**5)
                            prev_time = time
                        else:
                            # for speed -- we actually don't need to quantize it in Latent ODE
                            time = round(
                                (time - first_tp) / 10**5
                            )  # quatizing by 100 ms. 10,000 is one millisecond, 10,000,000 is one second

                        if time != prev_time:
                            tt.append(time)
                            vals.append(
                                torch.zeros(len(self.tag_ids),
                                            3).to(self.device))
                            mask.append(
                                torch.zeros(len(self.tag_ids),
                                            3).to(self.device))
                            nobs.append(
                                torch.zeros(len(self.tag_ids)).to(self.device))
                            labels.append(
                                torch.zeros(len(self.label_names)).to(
                                    self.device))
                            prev_time = time

                        if tag_id in self.tag_ids:
                            n_observations = nobs[-1][self.tag_dict[tag_id]]
                            if (self.reduce
                                    == 'average') and (n_observations > 0):
                                prev_val = vals[-1][self.tag_dict[tag_id]]
                                new_val = (prev_val * n_observations +
                                           value_vec) / (n_observations + 1)
                                vals[-1][self.tag_dict[tag_id]] = new_val
                            else:
                                vals[-1][self.tag_dict[tag_id]] = value_vec

                            mask[-1][self.tag_dict[tag_id]] = 1
                            nobs[-1][self.tag_dict[tag_id]] += 1

                            if label in self.label_names:
                                if torch.sum(labels[-1][
                                        self.label_dict[label]]) == 0:
                                    labels[-1][self.label_dict[label]] = 1
                        else:
                            assert tag_id == 'RecordID', 'Read unexpected tag id {}'.format(
                                tag_id)
                    save_record(records, record_id, tt, vals, mask, labels)

            torch.save(records, os.path.join(self.processed_folder, 'data.pt'))

        print('Done!')
コード例 #2
0
    def __init__(self,
                 root,
                 image_set='train',
                 mode='segmentation',
                 download=False,
                 transforms=None,
                 mean=[0.485, 0.456, 0.406],
                 std=[0.229, 0.224, 0.225],
                 size_img=(520, 520),
                 size_crop=(480, 480),
                 scale_factor=(0.5, 1.2),
                 p=0.5,
                 p_rotate=0.25,
                 rotate=False,
                 scale=True):

        try:
            from scipy.io import loadmat
            self._loadmat = loadmat
        except ImportError:
            raise RuntimeError(
                "Scipy is not found. This dataset needs to have scipy installed: "
                "pip install scipy")

        super(SBDataset, self).__init__(root, transforms)
        ## Transform
        self.mean = mean
        self.std = std
        self.size_img = size_img
        self.size_crop = size_crop
        self.scale_factor = scale_factor
        self.p = p
        self.p_rotate = p_rotate
        self.rotate = rotate
        self.scale = scale
        self.train = image_set == 'train' or image_set == 'train_noval'
        ##
        self.image_set = verify_str_arg(image_set, "image_set",
                                        ("train", "val", "train_noval"))
        self.mode = verify_str_arg(mode, "mode",
                                   ("segmentation", "boundaries"))
        self.num_classes = 20

        sbd_root = self.root
        image_dir = os.path.join(sbd_root, 'img')
        mask_dir = os.path.join(sbd_root, 'cls')

        if download:
            download_extract(self.url, self.root, self.filename, self.md5)
            extracted_ds_root = os.path.join(self.root, "benchmark_RELEASE",
                                             "dataset")
            for f in ["cls", "img", "inst", "train.txt", "val.txt"]:
                old_path = os.path.join(extracted_ds_root, f)
                shutil.move(old_path, sbd_root)
            download_url(self.voc_train_url, sbd_root, self.voc_split_filename,
                         self.voc_split_md5)

        if not os.path.isdir(sbd_root):
            raise RuntimeError('Dataset not found or corrupted.' +
                               ' You can use download=True to download it')

        split_f = os.path.join(sbd_root, image_set.rstrip('\n') + '.txt')

        with open(os.path.join(split_f), "r") as f:
            file_names = [x.strip() for x in f.readlines()]

        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
        self.masks = [os.path.join(mask_dir, x + ".mat") for x in file_names]
        assert (len(self.images) == len(self.masks))

        self._get_target = self._get_segmentation_target \
            if self.mode == "segmentation" else self._get_boundaries_target
コード例 #3
0
ファイル: german.py プロジェクト: amf272/nnlib
 def download(self):
     if not os.path.exists(self.base_dir):
         os.makedirs(self.base_dir)
     download_url(
         "https://raw.githubusercontent.com/human-analysis/MaxEnt-ARL/master/data/german/german.data-numeric",
         self.base_dir)
コード例 #4
0
 def download(self):
     if not os.path.exists(self.base_dir):
         os.makedirs(self.base_dir)
     for fname, url in self.relevant_files.items():
         download_url(url, self.base_dir, fname)
コード例 #5
0
#     - video:
#         - H-264
#         - MPEG-4 AVC (part 10) (avc1)
#         - fps: 29.97
#     - audio:
#         - MPEG AAC audio (mp4a)
#         - sample rate: 48K Hz
#

import torch
import torchvision
from torchvision.datasets.utils import download_url

# Download the sample video
download_url(
    "https://github.com/pytorch/vision/blob/main/test/assets/videos/WUzgd7C1pWA.mp4?raw=true",
    ".", "WUzgd7C1pWA.mp4")
video_path = "./WUzgd7C1pWA.mp4"

######################################
# Streams are defined in a similar fashion as torch devices. We encode them as strings in a form
# of ``stream_type:stream_id`` where ``stream_type`` is a string and ``stream_id`` a long int.
# The constructor accepts passing a ``stream_type`` only, in which case the stream is auto-discovered.
# Firstly, let's get the metadata for our particular video:

stream = "video"
video = torchvision.io.VideoReader(video_path, stream)
video.get_metadata()

######################################
# Here we can see that video has two streams - a video and an audio stream.
 def download_dataset(self):
     download_url(self.url, self.root, filename=self.filename, md5=self.md5)
     with zipfile.ZipFile(os.path.join(self.root, self.filename),
                          'r') as zip_ref:
         zip_ref.extractall(self.root,
                            members=c_f.extract_progress(zip_ref))
コード例 #7
0
ファイル: market.py プロジェクト: nhendy/re-id
 def _download(self):
     download_and_extract_archive(self.dataset_url, self.root)
     download_url(self.attributes_url,
                  os.path.join(self.root, self.dataset_dir_name))
コード例 #8
0
ファイル: sun.py プロジェクト: bighuang624/AGAM
    def download(self):
        import tarfile
        import shutil

        if self._check_integrity():
            return

        # download attributes
        attributes_filename = os.path.basename(self.attributes_url)
        download_url(self.attributes_url,
                     self.root,
                     filename=attributes_filename)

        attributes_tgz_filename = os.path.join(self.root, attributes_filename)
        with tarfile.open(attributes_tgz_filename, 'r') as f:
            f.extractall(self.root)

        if os.path.isfile(attributes_tgz_filename):
            os.remove(attributes_tgz_filename)

        attributes_original_dir = os.path.join(
            self.root,
            attributes_filename.split('.')[0])
        attributes_final_dir = os.path.join(self.root, self.attribute_dir)
        os.rename(attributes_original_dir, attributes_final_dir)

        # download images
        images_filename = os.path.basename(self.images_url)
        download_url(self.images_url, self.root, filename=images_filename)

        images_tgz_filename = os.path.join(self.root, images_filename)
        with tarfile.open(images_tgz_filename, 'r') as f:
            f.extractall(self.root)

        if os.path.isfile(images_tgz_filename):
            os.remove(images_tgz_filename)

        images_original_dir = os.path.join(self.root, 'images')
        images_final_dir = os.path.join(self.root, self.image_dir)

        for dir_name in os.listdir(images_original_dir):
            if dir_name in ['misc', 'outliers']:
                continue
            cur_dir = os.path.join(images_original_dir, dir_name)
            for child_dir_name in os.listdir(cur_dir):
                cur_child_dir = os.path.join(cur_dir, child_dir_name)
                first_child_name = os.listdir(cur_child_dir)[0]
                if os.path.isdir(os.path.join(
                        cur_child_dir,
                        first_child_name)):  # cur_dir contains dir
                    for child_name in os.listdir(cur_child_dir):
                        source_dir = os.path.join(cur_child_dir, child_name)
                        target_dir = target_dir = os.path.join(
                            images_final_dir,
                            '{}_{}'.format(child_dir_name, child_name))
                        if not os.path.exists(target_dir):
                            shutil.copytree(source_dir, target_dir)
                else:  # cur_child_dir only contains images
                    target_dir = os.path.join(images_final_dir, child_dir_name)
                    if not os.path.exists(target_dir):
                        shutil.copytree(cur_child_dir, target_dir)

        if os.path.exists(images_original_dir):
            shutil.rmtree(images_original_dir)

        # delete dirs that have only 1 image
        for dir_name in [
                'barbershop', 'distillery', 'ice_cream_parlor',
                'police_station', 'roller_skating_rink_indoor',
                'volleyball_court_indoor'
        ]:
            delete_dir = os.path.join(images_final_dir, dir_name)
            if os.path.exists(delete_dir):
                shutil.rmtree(delete_dir)
コード例 #9
0
    def download(self):
        """Download the n-MNIST data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import tarfile

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        if not self._check_mats_exists():
            for _, url in self.urls.items():
                filename = url.rpartition('/')[2]
                file_path = os.path.join(self.root, self.raw_folder, filename)
                if not self._check_gzips_exists():
                    download_url(url,
                                 root=os.path.join(self.root, self.raw_folder),
                                 filename=filename,
                                 md5=None)
                with open(file_path.replace('.gz', '.mat'), 'wb') as out_f:
                    tar = tarfile.open(file_path, 'r:gz')
                    zip_f = tar.extractfile(
                        os.path.basename(file_path.replace('.gz', '.mat')))
                    out_f.write(zip_f.read())
                    os.unlink(file_path)

        # process and save as torch files
        print('Processing...')

        def read_images(mat_data, split):
            length = mat_data[split].shape[0]
            num_rows = np.uint8(np.sqrt(mat_data[split].shape[1]))
            num_cols = num_rows
            return torch.from_numpy(mat_data[split]).view(
                length, num_rows, num_cols)

        def read_labels(mat_data, split):
            length = mat_data[split].shape[0]
            labels = np.asarray(
                [np.where(r == 1)[0][0] for r in mat_data[split]])
            return torch.from_numpy(labels).view(length).long()

        data = sio.loadmat(
            os.path.join(self.root, self.raw_folder, self.mat_files[0]))
        if len(self.mat_files) > 1:
            for mat_file in self.mat_files[1:]:
                mat_data = sio.loadmat(
                    os.path.join(self.root, self.raw_folder, mat_file))
                data['train_x'] = np.concatenate(
                    (data['train_x'], mat_data['train_x']), axis=0)
                data['train_y'] = np.concatenate(
                    (data['train_y'], mat_data['train_y']), axis=0)
                data['test_x'] = np.concatenate(
                    (data['test_x'], mat_data['test_x']), axis=0)
                data['test_y'] = np.concatenate(
                    (data['test_y'], mat_data['test_y']), axis=0)

        training_set = (read_images(data,
                                    'train_x'), read_labels(data, 'train_y'))
        test_set = (read_images(data, 'test_x'), read_labels(data, 'test_y'))
        with open(
                os.path.join(self.root, self.processed_folder,
                             self.training_file), 'wb') as f:
            torch.save(training_set, f)
        with open(
                os.path.join(self.root, self.processed_folder, self.test_file),
                'wb') as f:
            torch.save(test_set, f)

        print('Done!')
コード例 #10
0
ファイル: segmentation.py プロジェクト: Surgical-ART/deepdrr
    def download(self):
        if self.model_path.exists():
            return

        download_url(self.url, self.model_path.parent, self.filename)
コード例 #11
0
ファイル: datasets.py プロジェクト: daskol/svae
 def download(self, root: str):
     if not exists(join(root, FreyFace.FILENAME)):
         makedirs(root, exist_ok=True)
         download_url(FreyFace.URL, root, FreyFace.FILENAME, FreyFace.MD5)
コード例 #12
0
 def download(self, url, filename, file_md5):
     download_url(url, self.root, filename, file_md5)
コード例 #13
0
# wget https://www.usitc.gov/sites/default/files/tata/hts/hts_2021_preliminary_revision_2_json.json

# ./parse_hs_json.py  --input_hs_json  hts_2021_preliminary_revision_2_json.json  --output_csv  hts_2021_preliminary_revision_2_json.csv

import os
import sys
#from argparse import ArgumentParser
import pandas as pd
from torchvision.datasets.utils import download_url
import re
import json

config = json.loads(re.sub(r'#.*?\n', '', open('config.json', 'r').read()))

download_url(config['hts_url'], '.')

#parser = ArgumentParser(add_help=True)
#parser.add_argument('--input_hs_json', type=str, required=True, help='Input Json HS file')
#parser.add_argument('--output_csv', type=str, required=True, help='Output CSV file')


class obj:

    # constructor
    def __init__(self, dict1):
        self.__dict__.update(dict1)
        self.child = []
        self.id = None

    def add_child(self, o):
コード例 #14
0
 def download(self):
     if self._check_integrity():
         print('Files already downloaded and verified')
         return
     download_url(self.url, self.root, filename=self.filename, md5=self.tgz_md5)
コード例 #15
0
    def download(self):
        """Download the SmallNORB data if it doesn't exist in processed_folder already."""
        import gzip

        if self._check_exists():
            return

        # check if already extracted and verified
        if self._check_integrity():
            print('Files already downloaded and verified')
        else:
            # download and extract
            for file_dict in self._flat_data_files():
                url = self.dataset_root + file_dict["name"] + '.gz'
                filename = file_dict["name"]
                gz_filename = filename + '.gz'
                md5 = file_dict["md5_gz"]
                fpath = os.path.join(self.root, self.raw_folder, filename)
                gz_fpath = fpath + '.gz'

                # download if compressed file not exists and verified
                download_url(url, os.path.join(self.root, self.raw_folder),
                             gz_filename, md5)

                print('# Extracting data {}\n'.format(filename))

                with open(fpath, 'wb') as out_f, \
                        gzip.GzipFile(gz_fpath) as zip_f:
                    out_f.write(zip_f.read())

                os.unlink(gz_fpath)

        # process and save as torch files
        print('Processing...')

        # create processed folder
        try:
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        # read train files
        left_train_img, right_train_img = self._read_image_file(
            self.data_files["train"]["dat"]["name"])
        train_info = self._read_info_file(
            self.data_files["train"]["info"]["name"])
        train_label = self._read_label_file(
            self.data_files["train"]["cat"]["name"])

        # read test files
        left_test_img, right_test_img = self._read_image_file(
            self.data_files["test"]["dat"]["name"])
        test_info = self._read_info_file(
            self.data_files["test"]["info"]["name"])
        test_label = self._read_label_file(
            self.data_files["test"]["cat"]["name"])

        # save training files
        self._save(left_train_img, "{}_left".format(self.train_image_file))
        self._save(right_train_img, "{}_right".format(self.train_image_file))
        self._save(train_label, self.train_label_file)
        self._save(train_info, self.train_info_file)

        # save test files
        self._save(left_test_img, "{}_left".format(self.test_image_file))
        self._save(right_test_img, "{}_right".format(self.test_image_file))
        self._save(test_label, self.test_label_file)
        self._save(test_info, self.test_info_file)

        print('Done!')
コード例 #16
0
import os
import torch
import torchvision
from torchvision.datasets.utils import download_url
import zipfile

train_path = 'train'
dl_file = 'dl2018-image-proj.zip'
dl_url = 'https://users.aalto.fi/mvsjober/misc/'

zip_path = os.path.join(train_path, dl_file)
if not os.path.isfile(zip_path):
    download_url(dl_url + dl_file, root=train_path, filename=dl_file, md5=None)

with zipfile.ZipFile(zip_path) as zip_f:
    zip_f.extractall(train_path)
    #os.unlink(zip_path)

import pandas as pd
import glob
from sklearn.preprocessing import OneHotEncoder
import platform


# Create an array with tuple(img, label) pairs from the annotations txt files.
files = glob.glob("./train/annotations/*")
labels = []
for name in files:
    try:
        with open(name) as f:
            if platform.system == "Windows":
コード例 #17
0
ファイル: data_loader.py プロジェクト: tom-1221/VIBI
    def download(self):
        """Download the MNIST data if it doesn't exist in processed_folder already."""
        #from six.moves import urllib
        import gzip

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            download_url(url, root=os.path.join(self.root, self.raw_folder),
                         filename=filename, md5=None)
            with open(file_path.replace('.gz', ''), 'wb') as out_f, \
                    gzip.GzipFile(file_path) as zip_f:
                out_f.write(zip_f.read())
            os.unlink(file_path)

        # process and save as torch files
        print('Processing...')

        training_valid_set = (
            read_image_file(os.path.join(self.root, self.raw_folder, 'train-images-idx3-ubyte')),
            read_label_file(os.path.join(self.root, self.raw_folder, 'train-labels-idx1-ubyte'))
        )
        
        # Define the indices
        indices = list(range(len(training_valid_set[0])))
        if os.path.exists(os.path.join(self.root, self.processed_folder, 'valid_idx.npy')):
            valid_idx = np.load(os.path.join(self.root, self.processed_folder, 'valid_idx.npy'))
        else: 
            valid_idx = np.random.choice(indices, size = 10000, replace = False)
            np.save(os.path.join(self.root, self.processed_folder, 'valid_idx.npy'), valid_idx)
        train_idx = list(set(indices) - set(valid_idx))
        
        training_set = (
            training_valid_set[0][train_idx],
            training_valid_set[1][train_idx] 
        )
        valid_set = (
            training_valid_set[0][valid_idx],
            training_valid_set[1][valid_idx] 
        )
        test_set = (
            read_image_file(os.path.join(self.root, self.raw_folder, 't10k-images-idx3-ubyte')),
            read_label_file(os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte'))
        )
        with open(os.path.join(self.root, self.processed_folder, self.training_file), 'wb') as f:
            torch.save(training_set, f)
        with open(os.path.join(self.root, self.processed_folder, self.valid_file), 'wb') as f:
            torch.save(valid_set, f)
        with open(os.path.join(self.root, self.processed_folder, self.test_file), 'wb') as f:
            torch.save(test_set, f)

        print('Done!')
コード例 #18
0
ファイル: dataset.py プロジェクト: yukichou/TransFG
 def _download(self):
     for url, filename, md5 in self.file_list.values():
         download_url(url, root=self.root, filename=filename)
         if not check_integrity(os.path.join(self.root, filename), md5):
             raise RuntimeError("File not found or corrupted.")
コード例 #19
0
    def __init__(self,
                 root,
                 download=False,
                 image_set='train',
                 transforms=None):
        """
        Camvid dataset:https://course.fast.ai/datasets
        or simply wget https://s3.amazonaws.com/fast-ai-imagelocal/camvid.tgz

        Args:
            data_path: path to dataset folder
            image_set: train datset or validation dataset, 'train', or 'val'
            transforms: data augmentations
        """
        self._image_set = image_set
        self.transforms = transforms
        self._md5 = '2e796d442fe723192014ace89a1515b1'
        self._url = 'https://s3.amazonaws.com/fast-ai-imagelocal/camvid.tgz'
        self._filename = 'camvid.tgz'
        self._root = root

        if download:
            download_url(self._url, self._root, self._filename, md5=self._md5)

        self._label_IDs = {
            # Sky
            'Sky': 'Sky',

            # Building
            'Bridge': 'Building',
            'Building': 'Building',
            'Wall': 'Building',
            'Tunnel': 'Building',
            'Archway': 'Building',

            # Pole
            'Column_Pole': 'Pole',
            'TrafficCone': 'Pole',

            # Road
            'Road': 'Road',
            'LaneMkgsDriv': 'Road',
            'LaneMkgsNonDriv': 'Road',

            # Pavement
            'Sidewalk': 'Pavement',
            'ParkingBlock': 'Pavement',
            'RoadShoulder': 'Pavement',

            # Tree
            'Tree': 'Tree',
            'VegetationMisc': 'Tree',

            # SignSymbol
            'SignSymbol': 'SignSymbol',
            'Misc_Text': 'SignSymbol',
            'TrafficLight': 'SignSymbol',

            # Fence
            'Fence': 'Fence',

            # Car
            'Car': 'Car',
            'SUVPickupTruck': 'Car',
            'Truck_Bus': 'Car',
            'Train': 'Car',
            'OtherMoving': 'Car',

            # Pedestrian
            'Pedestrian': 'Pedestrian',
            'Child': 'Pedestrian',
            'CartLuggagePram': 'Pedestrian',
            'Animal': 'Pedestrian',

            # Bicyclist
            'Bicyclist': 'Bicyclist',
            'MotorcycleScooter': 'Bicyclist',

            #Void
            'Void': 'Void',
        }

        self.class_names = [
            'Sky', 'Building', 'Pole', 'Road', 'Pavement', 'Tree',
            'SignSymbol', 'Fence', 'Car', 'Pedestrian', 'Bicyclist', 'Void'
        ]

        self.class_num = len(self.class_names)
        self.ignore_index = self.class_names.index('Void')

        if not os.path.exists(os.path.join(self._root, self._image_set)):
            with tarfile.open(os.path.join(self._root, self._filename),
                              "r") as tar:
                tar.extractall(path=self._root)

            with open(os.path.join(self._root, 'camvid', 'codes.txt')) as f:
                self._codes = [line.strip() for line in f.readlines()]
            print('grouping 32 classes labels into 12 classes....')
            camvid_label_folder = os.path.join(self._root, 'camvid', 'labels',
                                               '**', '*.png')
            camvid_images_folder = os.path.join(self._root, 'camvid', 'images',
                                                '**', '*.png')
            for label_fp in glob.iglob(camvid_label_folder, recursive=True):
                label = cv2.imread(label_fp, -1)
                label = self._group_ids(label)
                cv2.imwrite(label_fp, label)

            with open(os.path.join(self._root, 'camvid', 'valid.txt')) as f:
                valids = [line.strip() for line in f.readlines()]

            image_pathes = []
            for image_fp in glob.iglob(camvid_images_folder, recursive=True):
                if self._image_set == 'train':
                    if os.path.basename(
                            image_fp
                    ) not in valids and 'test.txt' not in image_fp:
                        image_pathes.append(image_fp)
                elif self._image_set == 'val':
                    if os.path.basename(image_fp) in valids:
                        image_pathes.append(image_fp)

                else:
                    raise RuntimeError(
                        'image_set should only be one of train val')

            label_pathes = []
            for image_fp in image_pathes:
                basename = os.path.basename(image_fp)
                dirname = os.path.dirname(image_fp)
                sub_folder = os.path.dirname(dirname)
                dirname = os.path.join(sub_folder, 'labels')
                basename = basename.replace('.png', '_P.png')
                label_pathes.append(os.path.join(dirname, basename))

            image_pathes.extend(label_pathes)
            # create lmdb dataset
            print(
                'Writing {} data into lmdb format to acclerate data loading process'
                .format(self._image_set))
            self._create_lmdb(os.path.join(self._root, self._image_set),
                              image_pathes)
            print('Done...')
            shutil.rmtree(os.path.join(self._root, 'camvid'))

        lmdb_path = os.path.join(self._root, self._image_set)
        self._env = lmdb.open(lmdb_path,
                              map_size=1099511627776,
                              readonly=True,
                              lock=False)

        with self._env.begin(write=False) as txn:
            self._image_names= [key.decode() for key in txn.cursor().iternext(keys=True, values=False) \
                    if '_P' not in key.decode()]
コード例 #20
0
 def download(self):
     download_url(self.url, self.raw_dir)
コード例 #21
0
from torch.utils.data import random_split
import matplotlib.pyplot as plt
from scipy.io import loadmat
import pandas as pd
from torchvision import transforms
import torchvision.models as models
from skimage import io
from tqdm import tqdm
import time
from IPython.display import display

# %matplotlib inline

# Dowload the train dataset
dataset_url = "http://imagenet.stanford.edu/internal/car196/car_ims.tgz"
download_url(dataset_url, '.')

# Extract from archive
with tarfile.open('./car_ims.tgz', 'r:gz') as tar:
    tar.extractall(path='./data/')

# Download DevKit https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz
devkit_dataset_url = "https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz"
download_url(devkit_dataset_url, '.')

# Extract from archive
with tarfile.open('./car_devkit.tgz', 'r:gz') as tar:
    tar.extractall(path='./data/devkit')

label_dataset_url = "http://imagenet.stanford.edu/internal/car196/cars_annos.mat"
download_url(label_dataset_url, './data')
コード例 #22
0
        train = os.path.join(mit67, 'train')
        test = os.path.join(mit67, 'test')
        meta = os.path.join(mit67, 'meta')

        os.makedirs(mit67, exist_ok=True)
        os.makedirs(train, exist_ok=True)
        os.makedirs(test, exist_ok=True)
        os.makedirs(meta, exist_ok=True)

        # this step will create folder mit67/Images
        # which has all the images for each class in its own subfolder
        download(mit67)

        # download the csv files for the train and test split
        # from 'NAS Evaluation is Frustrating' repo
        # note that download_url doesn't work in vscode debug mode
        test_file_url = 'https://raw.githubusercontent.com/antoyang/NAS-Benchmark/master/data/MIT67_test.csv'
        train_file_urls = [
            'https://raw.githubusercontent.com/antoyang/NAS-Benchmark/master/data/MIT67_train1.csv',
            'https://raw.githubusercontent.com/antoyang/NAS-Benchmark/master/data/MIT67_train2.csv',
            'https://raw.githubusercontent.com/antoyang/NAS-Benchmark/master/data/MIT67_train3.csv',
            'https://raw.githubusercontent.com/antoyang/NAS-Benchmark/master/data/MIT67_train4.csv'
        ]

        download_url(test_file_url, meta, filename=None, md5=None)

        for tu in train_file_urls:
            download_url(tu, meta, filename=None, md5=None)

        prepare_data(mit67)
コード例 #23
0
import torch.nn as nn
from torch.autograd import Variable
import torchvision.models as models
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import torch.optim as optim
import os
import shutil
import random
import tarfile
from torchvision.datasets.utils import download_url, list_dir, list_files
from torchvision.datasets import ImageFolder
caltech256_url_ = 'http://vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar'
download_url(caltech256_url_, './data')


def extract_tar(filename):
    tar = tarfile.open(filename)
    tar.extractall('./data')
    tar.close()


def create_data(source_dir, target_dir, num_sample):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    for files in os.listdir(source_dir):
        source_dir_0 = os.path.join(source_dir, files)
        target_dir_0 = os.path.join(target_dir, files)
        if not os.path.exists(target_dir_0):
コード例 #24
0
import tarfile
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as tt
from torch.utils.data import random_split
from torchvision.utils import make_grid
import matplotlib.pyplot as plt


# Dowload the CIFAR-10 dataset
dataset_url = "http://files.fast.ai/data/cifar10.tgz"
download_url(dataset_url, '.')

# Extract from archive
with tarfile.open('./cifar10.tgz', 'r:gz') as tar:
    tar.extractall(path='./data')

data_dir = './data/cifar10'

# Data transforms (normalization & data augmentation)
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_tfms = tt.Compose([tt.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         tt.RandomHorizontalFlip(), 
                         tt.ToTensor(), 
                         tt.Normalize(*stats,inplace=True)])
valid_tfms = tt.Compose([tt.ToTensor(), tt.Normalize(*stats)])
コード例 #25
0
def download_extract(url, root, filename, md5):
    download_url(url, root, filename, md5)
    with tarfile.open(os.path.join(root, filename), "r") as tar:
        tar.extractall(path=root)
コード例 #26
0
    def download(self):
        if self._check_exists():
            return

        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")

        os.makedirs(self.raw_folder, exist_ok=True)
        os.makedirs(self.processed_folder, exist_ok=True)

        # Download outcome data
        for url in self.outcome_urls:
            filename = url.rpartition('/')[2]
            download_url(url, self.raw_folder, filename, None)

            txtfile = os.path.join(self.raw_folder, filename)
            with open(txtfile) as f:
                lines = f.readlines()
                outcomes = {}
                for l in lines[1:]:
                    l = l.rstrip().split(',')
                    record_id, labels = l[0], np.array(l[1:]).astype(float)
                    outcomes[record_id] = torch.Tensor(labels).to(self.device)

                torch.save(
                    labels,
                    os.path.join(self.processed_folder,
                                 filename.split('.')[0] + '.pt'))

        for url in self.urls:
            filename = url.rpartition('/')[2]
            download_url(url, self.raw_folder, filename, None)
            tar = tarfile.open(os.path.join(self.raw_folder, filename), "r:gz")
            tar.extractall(self.raw_folder)
            tar.close()

            print('Processing {}...'.format(filename))

            dirname = os.path.join(self.raw_folder, filename.split('.')[0])
            patients = []
            total = 0
            for txtfile in os.listdir(dirname):
                record_id = txtfile.split('.')[0]
                with open(os.path.join(dirname, txtfile)) as f:
                    lines = f.readlines()
                    prev_time = 0
                    tt = [0.]
                    vals = [torch.zeros(len(self.params)).to(self.device)]
                    mask = [torch.zeros(len(self.params)).to(self.device)]
                    nobs = [torch.zeros(len(self.params))]
                    for l in lines[1:]:
                        total += 1
                        time, param, val = l.split(',')
                        # Time in hours
                        time = float(time.split(':')[0]) + float(
                            time.split(':')[1]) / 60.
                        # round up the time stamps (up to 6 min by default)
                        # used for speed -- we actually don't need to quantize it in Latent ODE
                        time = round(
                            time / self.quantization) * self.quantization

                        if time != prev_time:
                            tt.append(time)
                            vals.append(
                                torch.zeros(len(self.params)).to(self.device))
                            mask.append(
                                torch.zeros(len(self.params)).to(self.device))
                            nobs.append(
                                torch.zeros(len(self.params)).to(self.device))
                            prev_time = time

                        if param in self.params_dict:
                            #vals[-1][self.params_dict[param]] = float(val)
                            n_observations = nobs[-1][self.params_dict[param]]
                            if self.reduce == 'average' and n_observations > 0:
                                prev_val = vals[-1][self.params_dict[param]]
                                new_val = (prev_val * n_observations +
                                           float(val)) / (n_observations + 1)
                                vals[-1][self.params_dict[param]] = new_val
                            else:
                                vals[-1][self.params_dict[param]] = float(val)
                            mask[-1][self.params_dict[param]] = 1
                            nobs[-1][self.params_dict[param]] += 1
                        else:
                            assert param == 'RecordID', 'Read unexpected param {}'.format(
                                param)
                tt = torch.tensor(tt).to(self.device)
                vals = torch.stack(vals)
                mask = torch.stack(mask)

                labels = None
                if record_id in outcomes:
                    # Only training set has labels
                    labels = outcomes[record_id]
                    # Out of 5 label types provided for Physionet, take only the last one -- mortality
                    labels = labels[4]

                patients.append((record_id, tt, vals, mask, labels))

            torch.save(
                patients,
                os.path.join(
                    self.processed_folder,
                    filename.split('.')[0] + "_" + str(self.quantization) +
                    '.pt'))

        print('Done!')