コード例 #1
0
def svhn(args):
    """
    Fetches and prepares (in a DeepDIVA friendly format) the SVHN dataset to the location specified
    on the file system

    Parameters
    ----------
    args : dict
        List of arguments necessary to run this routine. In particular its necessary to provide
        output_folder as String containing the path where the dataset will be downloaded

    Returns
    -------
        None
    """
    # Use torchvision to download the dataset
    torchvision.datasets.SVHN(root=args.output_folder,
                              split='train',
                              download=True)
    torchvision.datasets.SVHN(root=args.output_folder,
                              split='test',
                              download=True)

    # Load the data into memory
    train = _loadmat(os.path.join(args.output_folder, 'train_32x32.mat'))
    train_data, train_labels = train['X'], train['y'].astype(
        np.int64).squeeze()
    np.place(train_labels, train_labels == 10, 0)
    train_data = np.transpose(train_data, (3, 0, 1, 2))

    test = _loadmat(os.path.join(args.output_folder, 'test_32x32.mat'))
    test_data, test_labels = test['X'], test['y'].astype(np.int64).squeeze()
    np.place(test_labels, test_labels == 10, 0)
    test_data = np.transpose(test_data, (3, 0, 1, 2))

    # Make output folders
    dataset_root = os.path.join(args.output_folder, 'SVHN')
    train_folder = os.path.join(dataset_root, 'train')
    test_folder = os.path.join(dataset_root, 'test')

    make_folder_if_not_exists(dataset_root)
    make_folder_if_not_exists(train_folder)
    make_folder_if_not_exists(test_folder)

    def _write_data_to_folder(arr, labels, folder):
        for i, (img, label) in enumerate(zip(arr, labels)):
            dest = os.path.join(folder, str(label))
            make_folder_if_not_exists(dest)
            Image.fromarray(img).save(os.path.join(dest, str(i) + '.png'))

    # Write the images to the folders
    _write_data_to_folder(train_data, train_labels, train_folder)
    _write_data_to_folder(test_data, test_labels, test_folder)

    os.remove(os.path.join(args.output_folder, 'train_32x32.mat'))
    os.remove(os.path.join(args.output_folder, 'test_32x32.mat'))

    split_dataset(dataset_folder=dataset_root, split=0.2, symbolic=False)
コード例 #2
0
def loadmat(matf,**kwargs):
    from scipy.io import loadmat as _loadmat
    kwargs.setdefault('verify_compressed_data_integrity',True)
    kwargs.setdefault('squeeze_me',True)
    verbose = kwargs.pop('verbose',True)
    if verbose:
        print('loading',matf)    
    return _loadmat(matf,**kwargs)
コード例 #3
0
    def load(self):
        """
        Load all Matlab files from paths.

        Returns:
            Generator of lists of head positions - (X, Y) tuples.
        """
        for path in self.gt_paths:
            yield self.getter(_loadmat(path))
コード例 #4
0
def _make_STMView_colormap(fileName, name='my_cmap'):
    if fileName.endswith('.mat'):
        matFile = _loadmat(_path + fileName)
        for key in matFile:
            if key not in ['__version__', '__header__', '__globals__']:
                return _LSC.from_list(name, matFile[key])
    elif fileName.endswith('.txt'):
        txtFile = _np.loadtxt(_path + fileName)
        return _LSC.from_list(name, txtFile)
コード例 #5
0
ファイル: colormap.py プロジェクト: harrispirie/stmpy
def _make_STMView_colormap(fileName, name='my_cmap'):
    if fileName.endswith('.mat'):
        matFile = _loadmat(_path + fileName)
        for key in matFile:
            if key not in ['__version__', '__header__', '__globals__']:
                return _LSC.from_list(name, matFile[key])
    elif fileName.endswith('.txt'):
        txtFile = _np.loadtxt(_path + fileName)
        return _LSC.from_list(name, txtFile)
コード例 #6
0
ファイル: power.py プロジェクト: ylin00/DNPLab
def importPower(path, filename=''):
    '''
    import powers file
    '''
    fullPath = path + filename

    if fullPath[-4:] == '.mat':
        rawDict = _loadmat(fullPath)
        t = rawDict['timelist'].reshape(-1)
        p = rawDict['powerlist'].reshape(-1)
    elif fullPath[-4:] == '.csv':
        raw = _np.loadtxt(fullPath, delimiter=',', skiprows=1)
        t = raw[:, 0].reshape(-1)
        p = raw[:, 1].reshape(-1)
    else:
        print('Could not identify power data type')
        return

    return t, p
コード例 #7
0
def importPower(path, filename=""):
    """
    import powers file
    """
    fullPath = path + filename

    if fullPath[-4:] == ".mat":
        rawDict = _loadmat(fullPath)
        t = rawDict["timelist"].reshape(-1)
        p = rawDict["powerlist"].reshape(-1)

    elif fullPath[-4:] == ".csv":
        raw = _np.loadtxt(fullPath, delimiter=",", skiprows=1)
        t = raw[:, 0].reshape(-1)
        p = raw[:, 1].reshape(-1)

    else:
        print("Could not identify power data type")
        return

    return t, p
コード例 #8
0
    def __allprofs_init(self, filepath, floatID, verbose):
        # Loaded data is a dictionary.
        data = _loadmat(filepath, squeeze_me=True)

        isFloat = data.pop('flid') == floatID
        del data['ar']
        self.hpid = data.pop('hpid')[isFloat]
        if self.hpid.size == 0:
            raise RuntimeError('There appear to be no profiles for float {} in'
                               ' {}'.format(floatID, filepath))

        # Load the data!
        for key in data.keys():

            d = np.ndim(data[key])

            if d < 1 or d > 2 or '__' in key:
                if verbose:
                    print("* Skipping: {}.".format(key))
                continue
            elif d == 1:
                setattr(self, key, data[key][isFloat])
            elif d == 2:
                setattr(self, key, data[key][:, isFloat])
            else:
                if verbose:
                    print("* Don't know what to do with {}, skipping".format(
                        key))

            if verbose:
                print("  Loaded: {}.".format(key))

        print("All numerical data appears to have been loaded successfully.\n")

        print("Creating array of half profiles.\n")

        self.Profiles = np.array([Profile(self, h) for h in self.hpid])
コード例 #9
0
ファイル: emapex.py プロジェクト: jessecusack/emapex
    def __allprofs_init(self, filepath, floatID, verbose):
        # Loaded data is a dictionary.
        data = _loadmat(filepath, squeeze_me=True)

        isFloat = data.pop('flid') == floatID
        del data['ar']
        self.hpid = data.pop('hpid')[isFloat]
        if self.hpid.size == 0:
            raise RuntimeError('There appear to be no profiles for float {} in'
                               ' {}'.format(floatID, filepath))

        # Load the data!
        for key in data.keys():

            d = np.ndim(data[key])

            if d < 1 or d > 2 or '__' in key:
                if verbose:
                    print("* Skipping: {}.".format(key))
                continue
            elif d == 1:
                setattr(self, key, data[key][isFloat])
            elif d == 2:
                setattr(self, key, data[key][:, isFloat])
            else:
                if verbose:
                    print("* Don't know what to do with {}, skipping".format(key))

            if verbose:
                print("  Loaded: {}.".format(key))

        print("All numerical data appears to have been loaded successfully.\n")

        print("Creating array of half profiles.\n")

        self.Profiles = np.array([Profile(self, h) for h in self.hpid])
コード例 #10
0
def miml(args):
    """
    Fetches and prepares (in a DeepDIVA friendly format) the Multi-Instance Multi-Label Image Dataset
    on the file system. Dataset available at: http://lamda.nju.edu.cn/data_MIMLimage.ashx

    Parameters
    ----------
    args : dict
        List of arguments necessary to run this routine. In particular its necessary to provide
        output_folder as String containing the path where the dataset will be downloaded

    Returns
    -------
        None
    """
    # Download the files
    url = 'http://lamda.nju.edu.cn/files/miml-image-data.rar'
    if not os.path.exists(
            os.path.join(args.output_folder, 'miml-image-data.rar')):
        print('Downloading file!')
        filename = wget.download(url, out=args.output_folder)
    else:
        print('File already downloaded!')
        filename = os.path.join(args.output_folder, 'miml-image-data.rar')

    # Extract the files
    path_to_rar = filename
    path_to_output = os.path.join(args.output_folder, 'tmp_miml')
    rarfile.RarFile(path_to_rar).extractall(path_to_output)
    path_to_rar = os.path.join(path_to_output, 'original.rar')
    rarfile.RarFile(path_to_rar).extractall(path_to_output)
    path_to_rar = os.path.join(path_to_output, 'processed.rar')
    rarfile.RarFile(path_to_rar).extractall(path_to_output)
    print('Extracted files...')

    # Load the mat file
    mat = _loadmat(os.path.join(path_to_output, 'miml data.mat'))
    targets = mat['targets'].T
    classes = [item[0][0] for item in mat['class_name']]
    # Add filename at 0-index to correctly format the CSV headers
    classes.insert(0, 'filename')

    # Get list of all image files in the folder
    images = [
        item
        for item in _get_all_files_in_folders_and_subfolders(path_to_output)
        if item.endswith('jpg')
    ]
    images = sorted(images,
                    key=lambda e: int(os.path.basename(e).split('.')[0]))

    # Make splits
    train_data, test_data, train_labels, test_labels = _train_test_split(
        images, targets, test_size=0.2, random_state=42)
    train_data, val_data, train_labels, val_labels = _train_test_split(
        train_data, train_labels, test_size=0.2, random_state=42)

    # print('Size of splits\ntrain:{}\nval:{}\ntest:{}'.format(len(train_data),
    #                                                     len(val_data),
    #                                                     len(test_data)))

    # Make output folders
    dataset_root = os.path.join(args.output_folder, 'MIML')
    train_folder = os.path.join(dataset_root, 'train')
    val_folder = os.path.join(dataset_root, 'val')
    test_folder = os.path.join(dataset_root, 'test')

    make_folder_if_not_exists(dataset_root)
    make_folder_if_not_exists(train_folder)
    make_folder_if_not_exists(val_folder)
    make_folder_if_not_exists(test_folder)

    def _write_data_to_folder(data, labels, folder, classes):
        dest = os.path.join(folder, 'images')
        make_folder_if_not_exists(dest)
        for image, label in zip(data, labels):
            shutil.copy(image, dest)

        rows = np.column_stack(
            ([os.path.join('images', os.path.basename(item))
              for item in data], labels))
        rows = sorted(rows,
                      key=lambda e: int(e[0].split('/')[1].split('.')[0]))
        output_csv = pd.DataFrame(rows)
        output_csv.to_csv(os.path.join(folder, 'labels.csv'),
                          header=classes,
                          index=False)
        return

    # Write the images to the correct folders
    print('Writing the data to the filesystem')
    _write_data_to_folder(train_data, train_labels, train_folder, classes)
    _write_data_to_folder(val_data, val_labels, val_folder, classes)
    _write_data_to_folder(test_data, test_labels, test_folder, classes)

    os.remove(filename)
    shutil.rmtree(path_to_output)
    print('All done!')
    return
コード例 #11
0
    def __dir_init(self, dirpath, floatID, verbose):
        # This block searchs the directory tree for all the relevent files and
        # puts them in a dictionary organised by hpid number.
        # ctd, efp, gps, mis, vel, vit
        filesdict = {}
        mis_file = None
        gps_file = None
        single_mis_file = False
        single_gps_file = False
        searchstr = '*{}*vel.mat'.format(floatID)
        for root, dirnames, filenames in _os.walk(dirpath):
            for filename in _fnmatch.filter(filenames, searchstr):
                nameparts = filename.split('-')

                try:
                    hpid = int(nameparts[2])
                except ValueError:
                    if nameparts[2] == 'mis.mat':
                        single_mis_file = True
                        mis_file = _os.path.join(root, filename)
                        continue
                    elif nameparts[2] == 'gps.mat':
                        single_gps_file = True
                        gps_file = _os.path.join(root, filename)
                        continue

                filetype = nameparts[3].split('.')[0]
                fullname = _os.path.join(root, filename)
                if hpid in filesdict.keys():
                    filesdict[hpid][filetype] = fullname
                else:
                    filesdict[hpid] = {filetype: fullname}

        self.hpid = np.array(filesdict.keys())
        Nprofiles = self.hpid.size

        # Work out size of arrays required.
        pad_ctd = 0
        pad_ef = 0
        for hp in self.hpid:
            velfile = filesdict[hp]['vel']
            veldata = _loadmat(velfile,
                               squeeze_me=True,
                               variable_names=['ctd_mlt', 'efp_mlt'])
            pad_ctd = max(pad_ctd, np.asarray(veldata['ctd_mlt']).size)
            pad_ef = max(pad_ef, np.asarray(veldata['efp_mlt']).size)

        # CTD attributes.
        ctd_keys = ['Pctd', 'T', 'S', 'ctd_mlt', 'pc_ctd']
        ctd_attrs = ['P', 'T', 'S', 'UTC', 'ppos']
        # ef attributes.
        ef_keys = ['U1', 'U2', 'V1', 'V2', 'Pef', 'efp_mlt']
        ef_attrs = ['U1', 'U2', 'V1', 'V2', 'Pef', 'UTCef']
        # Singleton attributes.
        s_keys = ['lon', 'lat', 'LON', 'LAT', 'MLT_GPS']
        s_attrs = ['lon', 'lat', 'lon_gps', 'lat_gps', 'utc_gps']

        names = ctd_keys + ef_keys + s_keys

        # Initialise arrays.
        for ctd_attr in ctd_attrs:
            setattr(self, ctd_attr, np.NaN * np.zeros((pad_ctd, Nprofiles)))
        for ef_attr in ef_attrs:
            setattr(self, ef_attr, np.NaN * np.zeros((pad_ef, Nprofiles)))
        for s_attr in s_attrs:
            setattr(self, s_attr, np.NaN * np.zeros(Nprofiles))

        # Load vel data.
        for i, hp in enumerate(self.hpid):
            velfile = filesdict[hp]['vel']
            veldata = _loadmat(velfile, squeeze_me=True, variable_names=names)
            Nctd = np.asarray(veldata['ctd_mlt']).size
            Nef = np.asarray(veldata['efp_mlt']).size

            for ctd_key, ctd_attr in zip(ctd_keys, ctd_attrs):
                if Nctd < 2:
                    continue
                getattr(self, ctd_attr)[:Nctd, i] = veldata[ctd_key]

            for ef_key, ef_attr in zip(ef_keys, ef_attrs):
                if Nef < 2:
                    continue
                getattr(self, ef_attr)[:Nef, i] = veldata[ef_key]

            for s_key, s_attr in zip(s_keys, s_attrs):
                getattr(self, s_attr)[i] = veldata[s_key]

        print("All numerical data appears to have been loaded successfully.\n")

        print("Creating array of half profiles.\n")

        self.Profiles = np.array([Profile(self, h) for h in self.hpid])
コード例 #12
0
def what_floats_are_in_here(fname):
    """Finds all unique float ID numbers from a given allprofs##.mat file."""
    fs = _loadmat(fname, squeeze_me=True, variable_names='flid')['flid']
    return np.unique(fs[~np.isnan(fs)])
コード例 #13
0
def _make_STMView_colormap(fileName):
    matFile = _loadmat(_path + fileName)
    for key in matFile:
        if key not in ['__version__', '__header__', '__globals__']:
            return _ListedColormap(matFile[key])
コード例 #14
0
ファイル: emapex.py プロジェクト: jessecusack/emapex
    def __dir_init(self, dirpath, floatID, verbose):
        # This block searchs the directory tree for all the relevent files and
        # puts them in a dictionary organised by hpid number.
        # ctd, efp, gps, mis, vel, vit
        filesdict = {}
        mis_file = None
        gps_file = None
        single_mis_file = False
        single_gps_file = False
        searchstr = '*{}*vel.mat'.format(floatID)
        for root, dirnames, filenames in _os.walk(dirpath):
            for filename in _fnmatch.filter(filenames, searchstr):
                nameparts = filename.split('-')

                try:
                    hpid = int(nameparts[2])
                except ValueError:
                    if nameparts[2] == 'mis.mat':
                        single_mis_file = True
                        mis_file = _os.path.join(root, filename)
                        continue
                    elif nameparts[2] == 'gps.mat':
                        single_gps_file = True
                        gps_file = _os.path.join(root, filename)
                        continue

                filetype = nameparts[3].split('.')[0]
                fullname = _os.path.join(root, filename)
                if hpid in filesdict.keys():
                    filesdict[hpid][filetype] = fullname
                else:
                    filesdict[hpid] = {filetype: fullname}

        self.hpid = np.array(filesdict.keys())
        Nprofiles = self.hpid.size

        # Work out size of arrays required.
        pad_ctd = 0
        pad_ef = 0
        for hp in self.hpid:
            velfile = filesdict[hp]['vel']
            veldata = _loadmat(velfile, squeeze_me=True,
                               variable_names=['ctd_mlt', 'efp_mlt'])
            pad_ctd = max(pad_ctd, np.asarray(veldata['ctd_mlt']).size)
            pad_ef = max(pad_ef, np.asarray(veldata['efp_mlt']).size)

        # CTD attributes.
        ctd_keys = ['Pctd', 'T', 'S', 'ctd_mlt', 'pc_ctd']
        ctd_attrs = ['P', 'T', 'S', 'UTC', 'ppos']
        # ef attributes.
        ef_keys = ['U1', 'U2', 'V1', 'V2', 'Pef', 'efp_mlt']
        ef_attrs = ['U1', 'U2', 'V1', 'V2', 'Pef', 'UTCef']
        # Singleton attributes.
        s_keys = ['lon', 'lat', 'LON', 'LAT', 'MLT_GPS']
        s_attrs = ['lon', 'lat', 'lon_gps', 'lat_gps', 'utc_gps']

        names = ctd_keys + ef_keys + s_keys

        # Initialise arrays.
        for ctd_attr in ctd_attrs:
            setattr(self, ctd_attr, np.NaN*np.zeros((pad_ctd, Nprofiles)))
        for ef_attr in ef_attrs:
            setattr(self, ef_attr, np.NaN*np.zeros((pad_ef, Nprofiles)))
        for s_attr in s_attrs:
            setattr(self, s_attr, np.NaN*np.zeros(Nprofiles))

        # Load vel data.
        for i, hp in enumerate(self.hpid):
            velfile = filesdict[hp]['vel']
            veldata = _loadmat(velfile, squeeze_me=True, variable_names=names)
            Nctd = np.asarray(veldata['ctd_mlt']).size
            Nef = np.asarray(veldata['efp_mlt']).size

            for ctd_key, ctd_attr in zip(ctd_keys, ctd_attrs):
                if Nctd < 2:
                    continue
                getattr(self, ctd_attr)[:Nctd, i] = veldata[ctd_key]

            for ef_key, ef_attr in zip(ef_keys, ef_attrs):
                if Nef < 2:
                    continue
                getattr(self, ef_attr)[:Nef, i] = veldata[ef_key]

            for s_key, s_attr in zip(s_keys, s_attrs):
                getattr(self, s_attr)[i] = veldata[s_key]

        print("All numerical data appears to have been loaded successfully.\n")

        print("Creating array of half profiles.\n")

        self.Profiles = np.array([Profile(self, h) for h in self.hpid])
コード例 #15
0
ファイル: emapex.py プロジェクト: jessecusack/emapex
def what_floats_are_in_here(fname):
    """Finds all unique float ID numbers from a given allprofs##.mat file."""
    fs = _loadmat(fname, squeeze_me=True, variable_names='flid')['flid']
    return np.unique(fs[~np.isnan(fs)])