Exemple #1
0
def _make_npz(path, urls):
    x_url, y_url = urls
    x_path = download.cached_download(x_url)
    y_path = download.cached_download(y_url)

    with gzip.open(x_path, 'rb') as imgpath, gzip.open(y_path,
                                                       'rb') as lblpath:
        labels = numpy.frombuffer(lblpath.read(), dtype=numpy.uint8, offset=8)
        images = numpy.frombuffer(imgpath.read(), dtype=numpy.uint8,
                                  offset=16).reshape(len(labels), 784)
    numpy.savez_compressed(path, x=images, y=labels)
    return {'x': images, 'y': labels}
def download(base_path, name, url, img_type):
    cache_root = "{0}/{1}/cache".format(base_path, name)
    print("Caching to {0}".format(cache_root))
    set_cache(cache_root)

    archive_path = dl.cached_download(url)
    fileOb = zipfile.ZipFile(archive_path, mode='r')
    names = fileOb.namelist()

    try:
        os.makedirs(cache_root)
    except OSError:
        if not os.path.isdir(cache_root):
            raise

    data, label = [], []
    try:
        for name in names:
            path = cache_root + name
            if bool(re.search('obj', name)):
                obj = fileOb.extract(name, path=path)
                img = imreadBGR(obj, img_type)
                data.append(img)
                label.append(int(name.split("__")[0].split("/obj")[1]))
    finally:
        print("processed {0} images".format(len(names)))
        #shutil.rmtree(cache_root)
    #data = np.stack(data, axis=0).transpose([0, 3, 1, 2])
    label = np.array(label).astype(np.uint8)
    return data, label
Exemple #3
0
def download_and_extract_qm9(save_filepath):
    logger = getLogger(__name__)
    logger.warning('Extracting QM9 dataset, it takes time...')
    download_file_path = download.cached_download(download_url)
    tf = tarfile.open(download_file_path, 'r')
    temp_dir = tempfile.mkdtemp()
    tf.extractall(temp_dir)
    file_re = os.path.join(temp_dir, '*.xyz')
    file_pathes = glob.glob(file_re)
    # Make sure the order is sorted
    file_pathes.sort()
    ls = []
    for path in tqdm(file_pathes):
        with open(path, 'r') as f:
            data = [line.strip() for line in f]

        num_atom = int(data[0])
        properties = list(map(float, data[1].split('\t')[1:]))
        smiles = data[3 + num_atom].split('\t')
        new_ls = smiles + properties
        ls.append(new_ls)

    df = pandas.DataFrame(ls, columns=_smiles_column_names + _label_names)
    df.to_csv(save_filepath)
    shutil.rmtree(temp_dir)
    return True
Exemple #4
0
    def creator(path):
        archive_path = download.cached_download(url)

        train_x = numpy.empty((5, 10000, 3072), dtype=numpy.uint8)
        train_y = numpy.empty((5, 10000), dtype=numpy.uint8)
        test_y = numpy.empty(10000, dtype=numpy.uint8)

        dir_name = '{}-batches-py'.format(name)

        with tarfile.open(archive_path, 'r:gz') as archive:
            # training set
            for i in range(5):
                file_name = '{}/data_batch_{}'.format(dir_name, i + 1)
                d = pickle.load(archive.extractfile(file_name))
                train_x[i] = d['data']
                train_y[i] = d['labels']

            # test set
            file_name = '{}/test_batch'.format(dir_name)
            d = pickle.load(archive.extractfile(file_name))
            test_x = d['data']
            test_y[...] = d['labels']  # copy to array

        train_x = train_x.reshape(50000, 3072)
        train_y = train_y.reshape(50000)

        numpy.savez_compressed(path, train_x=train_x, train_y=train_y,
                               test_x=test_x, test_y=test_y)
        return {'train_x': train_x, 'train_y': train_y,
                'test_x': test_x, 'test_y': test_y}
def creator(cache_path):
    data_path = download.cached_download(data_url)
    shutil.copy(data_path, cache_path)

    global task_names
    assays, smiles, task_names = _load(data_path)
    return create_dataset(assays, smiles, smiles2fp)
    def __init__(self, pretrained_model='auto'):
        super(HyperFaceModel, self).__init__(
            conv1  = L.Convolution2D(3,   96, 11, stride=4, pad=0),
            conv1a = L.Convolution2D(96,  256, 4, stride=4, pad=0),
            conv2  = L.Convolution2D(96,  256, 5, stride=1, pad=2),
            conv3  = L.Convolution2D(256, 384, 3, stride=1, pad=1),
            conv3a = L.Convolution2D(384, 256, 2, stride=2, pad=0),
            conv4  = L.Convolution2D(384, 384, 3, stride=1, pad=1),
            conv5  = L.Convolution2D(384, 256, 3, stride=1, pad=1),
            conv_all = L.Convolution2D(768, 192, 1, stride=1, pad=0),
            fc_full  = L.Linear(6 * 6 * 192, 3072),
            fc_detection1  = L.Linear(3072, 512),
            fc_detection2  = L.Linear(512,  2),
            fc_landmarks1  = L.Linear(3072, 512),
            fc_landmarks2  = L.Linear(512,  42),
            fc_visibility1 = L.Linear(3072, 512),
            fc_visibility2 = L.Linear(512,  21),
            fc_pose1       = L.Linear(3072, 512),
            fc_pose2       = L.Linear(512,  3),
            fc_gender1     = L.Linear(3072, 512),
            fc_gender2     = L.Linear(512,  2),
        )

        # download pretrained weights
        if pretrained_model == 'auto':
            rospy.loginfo("Loading pretrained model. (This may take some minutes.)")
            url = 'https://jsk-ros-pkg.s3.amazonaws.com/chainer/hyperface_model_epoch_190.npz'
            load_npz(download.cached_download(url), self)
            rospy.loginfo("Model loaded")
        elif pretrained_model:
            rospy.loginfo("Loading pretrained model: %s" % pretrained_model)
            load_npz(pretrained_model, self)
            rospy.loginfo("Model loaded")
        else:
            rospy.logwarn("No pretrained model is loaded.")
Exemple #7
0
def download_and_extract_qm9(save_filepath):
    logger = getLogger(__name__)
    logger.warning('Extracting QM9 dataset, it takes time...')
    download_file_path = download.cached_download(download_url)
    tf = tarfile.open(download_file_path, 'r')
    temp_dir = tempfile.mkdtemp()
    tf.extractall(temp_dir)
    file_re = os.path.join(temp_dir, '*.xyz')
    file_pathes = glob.glob(file_re)
    # Make sure the order is sorted
    file_pathes.sort()
    ls = []
    for path in tqdm(file_pathes):
        with open(path, 'r') as f:
            data = [line.strip() for line in f]

        num_atom = int(data[0])
        properties = list(map(float, data[1].split('\t')[1:]))
        smiles = data[3 + num_atom].split('\t')
        new_ls = smiles + properties
        ls.append(new_ls)

    df = pandas.DataFrame(ls, columns=_smiles_column_names + _label_names)
    df.to_csv(save_filepath)
    shutil.rmtree(temp_dir)
    return True
Exemple #8
0
    def creator(path):
        archive_path = download.cached_download(url)

        train_x = numpy.empty((5, 10000, 3072), dtype=numpy.uint8)
        train_y = numpy.empty((5, 10000), dtype=numpy.uint8)
        test_y = numpy.empty(10000, dtype=numpy.uint8)

        dir_name = '{}-batches-py/'.format(name)

        with tarfile.open(archive_path, 'r:gz') as archive:
            # training set
            for i in range(5):
                file_name = '{}/data_batch_{}'.format(dir_name, i + 1)
                d = pickle.load(archive.extractfile(file_name))
                train_x[i] = d['data']
                train_y[i] = d['labels']

            # test set
            file_name = '{}/test_batch'.format(dir_name)
            d = pickle.load(archive.extractfile(file_name))
            test_x = d['data']
            test_y[...] = d['labels']  # copy to array

        train_x = train_x.reshape(50000, 3072)
        train_y = train_y.reshape(50000)

        numpy.savez_compressed(path, x=train_x, y=train_y)
        return {'train_x': train_x, 'train_y': train_y,
                'test_x': test_x, 'test_y': test_y}
Exemple #9
0
def _make_npz(path, url):
    _path = download.cached_download(url)
    raw = io.loadmat(_path)
    images = raw["X"].astype(numpy.uint8)
    labels = raw["y"].astype(numpy.uint8)

    numpy.savez_compressed(path, x=images, y=labels)
    return {'x': images, 'y': labels}
Exemple #10
0
def _make_npz(path, url):
    _path = download.cached_download(url)
    raw = io.loadmat(_path)
    images = raw["X"].astype(numpy.uint8)
    labels = raw["y"].astype(numpy.uint8)

    numpy.savez_compressed(path, x=images, y=labels)
    return {'x': images, 'y': labels}
Exemple #11
0
def _creator(cached_file_path, sdffile, url):
    download_file_path = download.cached_download(url)

    with zipfile.ZipFile(download_file_path, 'r') as z:
        z.extract(sdffile)
    mol_supplier = Chem.SDMolSupplier(sdffile)
    shutil.move(sdffile, cached_file_path)
    return mol_supplier
Exemple #12
0
def download_and_extract_reddit(save_dirpath):
    # type: (str) -> bool
    print('downloading reddit dataset...')
    download_file_path = download.cached_download(download_url)
    print('extracting reddit dataset...')
    zip = ZipFile(download_file_path, 'r')
    zip.extractall(save_dirpath)
    return True
Exemple #13
0
def _make_npz(path_npz, url, model):
    path_caffemodel = download.cached_download(url)
    sys.stderr.write(
        'Now loading caffemodel (usually it may take few minutes)\n')
    sys.stderr.flush()
    GoogLeNet.convert_caffemodel_to_npz(path_caffemodel, path_npz)
    npz.load_npz(path_npz, model)
    return model
Exemple #14
0
def _make_npz(path_npz, url, model):
    path_caffemodel = download.cached_download(url)
    sys.stderr.write(
        'Now loading caffemodel (usually it may take few minutes)\n')
    sys.stderr.flush()
    VGGLayers.convert_caffemodel_to_npz(path_caffemodel, path_npz)
    npz.load_npz(path_npz, model)
    return model
Exemple #15
0
def download_and_extract_cora(save_dirpath):
    # type: (str) -> bool
    print('downloading cora dataset...')
    download_file_path = download.cached_download(download_url)
    print('extracting cora dataset...')
    tf = tarfile.open(download_file_path, 'r')
    tf.extractall(os.path.dirname(save_dirpath))
    return True
Exemple #16
0
def _load_words(url):
    path = download.cached_download(url)
    words = []
    with open(path) as words_file:
        for line in words_file:
            if line:
                words += line.strip().split()
                words.append('<eos>')
    return words
Exemple #17
0
def _load_words(url):
    path = download.cached_download(url)
    words = []
    with open(path) as words_file:
        for line in words_file:
            if line:
                words += line.strip().split()
                words.append('<eos>')
    return words
Exemple #18
0
def download_and_extract_zinc250k(save_filepath):
    logger = getLogger(__name__)
    logger.info('Extracting ZINC250k dataset...')
    download_file_path = download.cached_download(download_url)
    df = pandas.read_csv(download_file_path)
    # 'smiles' column contains '\n', need to remove it.
    df['smiles'] = df['smiles'].apply(_remove_new_line)
    df.to_csv(save_filepath, columns=_smiles_column_names + _label_names)
    return True
Exemple #19
0
def download_and_extract_zinc250k(save_filepath):
    logger = getLogger(__name__)
    logger.info('Extracting ZINC250k dataset...')
    download_file_path = download.cached_download(download_url)
    df = pandas.read_csv(download_file_path)
    # 'smiles' column contains '\n', need to remove it.
    df['smiles'] = df['smiles'].apply(_remove_new_line)
    df.to_csv(save_filepath, columns=_smiles_column_names + _label_names)
    return True
Exemple #20
0
    def __init__(self, n_layers):
        root = download.get_dataset_directory('pfnet/chainer/models/')
        caffemodel_path = os.path.join(
            root, 'ResNet-{}-model.caffemodel'.format(n_layers))
        if not os.path.exists(caffemodel_path):
            if n_layers == 50:
                cache_path = download.cached_download(self.URLS['resnet50'])
            elif n_layers == 101:
                cache_path = download.cached_download(self.URLS['resnet101'])
            elif n_layers == 152:
                cache_path = download.cached_download(self.URLS['resnet152'])
            shutil.move(cache_path, caffemodel_path)
        super(ResNet, self).__init__(os.path.basename(caffemodel_path),
                                     n_layers=n_layers)

        self._children.remove('fc6')
        del self.fc6
        del self.functions['fc6']
        del self.functions['prob']
        self.train = True
    def __init__(self, n_layers):
        root = download.get_dataset_directory('pfnet/chainer/models/')
        caffemodel_path = os.path.join(
            root, 'ResNet-{}-model.caffemodel'.format(n_layers))
        if not os.path.exists(caffemodel_path):
            if n_layers == 50:
                cache_path = download.cached_download(self.URLS['resnet50'])
            elif n_layers == 101:
                cache_path = download.cached_download(self.URLS['resnet101'])
            elif n_layers == 152:
                cache_path = download.cached_download(self.URLS['resnet152'])
            shutil.move(cache_path, caffemodel_path)
        super(ResNet, self).__init__(
            os.path.basename(caffemodel_path), n_layers=n_layers)

        self._children.remove('fc6')
        del self.fc6
        del self.functions['fc6']
        del self.functions['prob']
        self.train = True
Exemple #22
0
def download_pdbbind_time(url, save_filepath):
    """Download and caches PDBBind year table.

    Args:
        url(str): URL of year table
        save_filepath(str): filepath for year table

    Returns(bool): If success downloading, returning `True`.
    """
    download_file_path = download.cached_download(url)
    shutil.move(download_file_path, save_filepath)
    return True
Exemple #23
0
def download_pdbbind_time(url, save_filepath):
    """Download and caches PDBBind year table.

    Args:
        url(str): URL of year table
        save_filepath(str): filepath for year table

    Returns(bool): If success downloading, returning `True`.
    """
    download_file_path = download.cached_download(url)
    shutil.move(download_file_path, save_filepath)
    return True
Exemple #24
0
def _make_npz(path, urls):
    x_url, y_url = urls
    x_path = download.cached_download(x_url)
    y_path = download.cached_download(y_url)

    with gzip.open(x_path, 'rb') as fx, gzip.open(y_path, 'rb') as fy:
        fx.read(4)
        fy.read(4)
        N, = struct.unpack('>i', fx.read(4))
        if N != struct.unpack('>i', fy.read(4))[0]:
            raise RuntimeError('wrong pair of MNIST images and labels')
        fx.read(8)

        x = numpy.empty((N, 784), dtype=numpy.uint8)
        y = numpy.empty(N, dtype=numpy.uint8)

        for i in six.moves.range(N):
            y[i] = ord(fy.read(1))
            for j in six.moves.range(784):
                x[i, j] = ord(fx.read(1))

    numpy.savez_compressed(path, x=x, y=y)
    return {'x': x, 'y': y}
Exemple #25
0
def download_model(url, subdir_name=None, root_dir_name='ronekko'):
    root_dir_path = get_dataset_directory(root_dir_name)
    basename = os.path.basename(url)
    if subdir_name is None:
        subdir_name = ''
    save_dir_path = os.path.join(root_dir_path, subdir_name)
    save_file_path = os.path.join(save_dir_path, basename)

    if not os.path.exists(save_file_path):
        cache_path = cached_download(url)
        if not os.path.exists(save_dir_path):
            os.mkdir(save_dir_path)
        os.rename(cache_path, save_file_path)
    return save_file_path
Exemple #26
0
def _make_npz(path, urls):
    x_url, y_url = urls
    x_path = download.cached_download(x_url)
    y_path = download.cached_download(y_url)

    with gzip.open(x_path, 'rb') as fx, gzip.open(y_path, 'rb') as fy:
        fx.read(4)
        fy.read(4)
        N, = struct.unpack('>i', fx.read(4))
        if N != struct.unpack('>i', fy.read(4))[0]:
            raise RuntimeError('wrong pair of MNIST images and labels')
        fx.read(8)

        x = numpy.empty((N, 784), dtype=numpy.uint8)
        y = numpy.empty(N, dtype=numpy.uint8)

        for i in six.moves.range(N):
            y[i] = ord(fy.read(1))
            for j in six.moves.range(784):
                x[i, j] = ord(fx.read(1))

    numpy.savez_compressed(path, x=x, y=y)
    return {'x': x, 'y': y}
Exemple #27
0
    def creator(path):
        archive_path = download.cached_download(url)

        if name == 'cifar-10':
            train_x = numpy.empty((5, 10000, 3072), dtype=numpy.uint8)
            train_y = numpy.empty((5, 10000), dtype=numpy.uint8)
            test_y = numpy.empty(10000, dtype=numpy.uint8)

            dir_name = '{}-batches-py'.format(name)

            with tarfile.open(archive_path, 'r:gz') as archive:
                # training set
                for i in range(5):
                    file_name = '{}/data_batch_{}'.format(dir_name, i + 1)
                    d = _pickle_load(archive.extractfile(file_name))
                    train_x[i] = d['data']
                    train_y[i] = d['labels']

                # test set
                file_name = '{}/test_batch'.format(dir_name)
                d = _pickle_load(archive.extractfile(file_name))
                test_x = d['data']
                test_y[...] = d['labels']  # copy to array

            train_x = train_x.reshape(50000, 3072)
            train_y = train_y.reshape(50000)
        else:
            # name == 'cifar-100'
            def load(archive, file_name):
                d = _pickle_load(archive.extractfile(file_name))
                x = d['data'].reshape((-1, 3072))
                y = numpy.array(d['fine_labels'], dtype=numpy.uint8)
                return x, y

            with tarfile.open(archive_path, 'r:gz') as archive:
                train_x, train_y = load(archive, 'cifar-100-python/train')
                test_x, test_y = load(archive, 'cifar-100-python/test')

        numpy.savez_compressed(path,
                               train_x=train_x,
                               train_y=train_y,
                               test_x=test_x,
                               test_y=test_y)
        return {
            'train_x': train_x,
            'train_y': train_y,
            'test_x': test_x,
            'test_y': test_y
        }
Exemple #28
0
def download(dataset_type='normal'):
    if dataset_type == 'normal':
        url = 'https://github.com/brendenlake/omniglot/blob/master/python/images_background.zip'
    elif dataset_type == 'small1':
        url = 'https://github.com/brendenlake/omniglot/blob/master/python/images_background_small1.zip'
    elif dataset_type == 'small2':
        url = 'https://github.com/brendenlake/omniglot/blob/master/python/images_background_small2.zip'
    elif dataset_type == 'evaluation':
        url = 'https://github.com/brendenlake/omniglot/blob/master/python/images_evaluation.zip'

    archive_path = dl.cached_download(url)
    with tarfile.open(archive_path, 'r:gz') as archive:
        names = archive.getnames()
        for name in names:
            print(name)
Exemple #29
0
def _download_and_extract_tox21(config_name, save_filepath):
    is_successful = False
    c = _config[config_name]
    url = c['url']
    sdffile = c['filename']

    # Download tox21 dataset
    download_file_path = download.cached_download(url)

    # Extract zipfile to get sdffile
    with zipfile.ZipFile(download_file_path, 'r') as z:
        z.extract(sdffile)
        shutil.move(sdffile, save_filepath)

    is_successful = True
    return is_successful
Exemple #30
0
def download(dataset_type='normal'):
	if dataset_type=='normal':
		url = 'https://github.com/brendenlake/omniglot/blob/master/python/images_background.zip'
	elif dataset_type=='small1':
		url = 'https://github.com/brendenlake/omniglot/blob/master/python/images_background_small1.zip'
	elif dataset_type=='small2':
		url = 'https://github.com/brendenlake/omniglot/blob/master/python/images_background_small2.zip'
	elif dataset_type=='evaluation':
		url = 'https://github.com/brendenlake/omniglot/blob/master/python/images_evaluation.zip'


	archive_path = dl.cached_download(url)
	with tarfile.open(archive_path, 'r:gz') as archive:
		names = archive.getnames()
		for name in names:
			print(name)
Exemple #31
0
    def creator(path):

        def load(archive, file_name):
            d = _pickle_load(archive.extractfile(file_name))
            x = d['data'].reshape((-1, 3072))
            y = numpy.array(d['fine_labels'], dtype=numpy.uint8)
            return x, y

        archive_path = download.cached_download(url)
        with tarfile.open(archive_path, 'r:gz') as archive:
            train_x, train_y = load(archive, 'cifar-100-python/train')
            test_x, test_y = load(archive, 'cifar-100-python/test')

        numpy.savez_compressed(path, train_x=train_x, train_y=train_y,
                               test_x=test_x, test_y=test_y)
        return {'train_x': train_x, 'train_y': train_y,
                'test_x': test_x, 'test_y': test_y}
Exemple #32
0
def download_dataset(dataset_url, save_filepath):
    """Download and caches MoleculeNet Dataset

    Args:
        dataset_url (str): URL of dataset
        save_filepath (str): filepath for dataset

    Returns (bool): If success downloading, returning `True`.

    """
    logger = getLogger(__name__)
    logger.warning('Downloading {} dataset, it takes time...'.format(
        dataset_url.split('/')[-1]))
    download_file_path = download.cached_download(dataset_url)
    shutil.move(download_file_path, save_filepath)
    # pandas can load gzipped or tarball csv file
    return True
Exemple #33
0
    def creator(path):

        def load(archive, file_name):
            d = _pickle_load(archive.extractfile(file_name))
            x = d['data'].reshape((-1, 3072))
            y = numpy.array(d['fine_labels'], dtype=numpy.uint8)
            return x, y

        archive_path = download.cached_download(url)
        with tarfile.open(archive_path, 'r:gz') as archive:
            train_x, train_y = load(archive, 'cifar-100-python/train')
            test_x, test_y = load(archive, 'cifar-100-python/test')

        numpy.savez_compressed(path, train_x=train_x, train_y=train_y,
                               test_x=test_x, test_y=test_y)
        return {'train_x': train_x, 'train_y': train_y,
                'test_x': test_x, 'test_y': test_y}
Exemple #34
0
def download():
	url = "http://people.csail.mit.edu/rgrosse/intrinsic/intrinsic-data.tar.gz"
	archive_path = dl.cached_download(url)
	with tarfile.open(archive_path, 'r:gz') as archive:
		names = archive.getnames()
		key_set = set()
		object_set = set()
		for path in names:
			if bool(re.search("png", path)):
				key = os.path.split(path)[1].split(".")[0]
				key_set.add(key)

				object_name = os.path.split(os.path.split(path)[0])[1]
				# print(object_name)
				object_set.add(object_name)

		print(key_set, object_set)
Exemple #35
0
def download_dataset(dataset_url, save_filepath):
    """Download and caches MoleculeNet Dataset

    Args:
        dataset_url (str): URL of dataset
        save_filepath (str): filepath for dataset

    Returns (bool): If success downloading, returning `True`.

    """
    logger = getLogger(__name__)
    logger.warning('Downloading {} dataset, it takes time...'
                   .format(dataset_url.split('/')[-1]))
    download_file_path = download.cached_download(dataset_url)
    shutil.move(download_file_path, save_filepath)
    # pandas can load gzipped or tarball csv file
    return True
Exemple #36
0
def load_model(model_name, n_class):
    archs = {'nin': NIN, 'vgg16': VGG16BatchNormalization}
    model = archs[model_name](n_class=n_class)
    if model_name == 'nin':
        pass
    elif model_name == 'vgg16':
        rospack = rospkg.RosPack()
        model_path = osp.join(rospack.get_path('decopin_hand'), 'scripts',
                              'vgg16', 'VGG_ILSVRC_16_layers.npz')
        if not osp.exists(model_path):
            from chainer.dataset import download
            from chainer.links.caffe.caffe_function import CaffeFunction
            path_caffemodel = download.cached_download(
                'http://www.robots.ox.ac.uk/%7Evgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel'
            )
            caffemodel = CaffeFunction(path_caffemodel)
            npz.save_npz(model_path, caffemodel, compression=False)

        vgg16 = VGG16Layers(
            pretrained_model=model_path)  # original VGG16 model
        print('Load model from {}'.format(model_path))
        for l in model.children():
            if l.name.startswith('conv'):
                # l.disable_update()  # Comment-in for transfer learning, comment-out for fine tuning
                l1 = getattr(vgg16, l.name)
                l2 = getattr(model, l.name)
                assert l1.W.shape == l2.W.shape
                assert l1.b.shape == l2.b.shape
                l2.W.data[...] = l1.W.data[...]
                l2.b.data[...] = l1.b.data[...]
            elif l.name in ['fc6', 'fc7']:
                l1 = getattr(vgg16, l.name)
                l2 = getattr(model, l.name)
                assert l1.W.size == l2.W.size
                assert l1.b.size == l2.b.size
                l2.W.data[...] = l1.W.data.reshape(l2.W.shape)[...]
                l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
    else:
        print('Model type {} is invalid.'.format(model_name))
        exit()

    return model
Exemple #37
0
    def creator(path):
        archive_path = download.cached_download(url)

        if name == 'cifar-10':
            train_x = numpy.empty((5, 10000, 3072), dtype=numpy.uint8)
            train_y = numpy.empty((5, 10000), dtype=numpy.uint8)
            test_y = numpy.empty(10000, dtype=numpy.uint8)

            dir_name = '{}-batches-py'.format(name)

            with tarfile.open(archive_path, 'r:gz') as archive:
                # training set
                for i in range(5):
                    file_name = '{}/data_batch_{}'.format(dir_name, i + 1)
                    d = _pickle_load(archive.extractfile(file_name))
                    train_x[i] = d['data']
                    train_y[i] = d['labels']

                # test set
                file_name = '{}/test_batch'.format(dir_name)
                d = _pickle_load(archive.extractfile(file_name))
                test_x = d['data']
                test_y[...] = d['labels']  # copy to array

            train_x = train_x.reshape(50000, 3072)
            train_y = train_y.reshape(50000)
        else:
            # name == 'cifar-100'
            def load(archive, file_name):
                d = _pickle_load(archive.extractfile(file_name))
                x = d['data'].reshape((-1, 3072))
                y = numpy.array(d['fine_labels'], dtype=numpy.uint8)
                return x, y

            with tarfile.open(archive_path, 'r:gz') as archive:
                train_x, train_y = load(archive, 'cifar-100-python/train')
                test_x, test_y = load(archive, 'cifar-100-python/test')

        numpy.savez_compressed(path, train_x=train_x, train_y=train_y,
                               test_x=test_x, test_y=test_y)
        return {'train_x': train_x, 'train_y': train_y,
                'test_x': test_x, 'test_y': test_y}
    def creator(path):
        dump_path = download.cached_download(url)
        tmp_dir = tempfile.mkdtemp()

        # WikiExtractor needs .bz2 extension to function well
        dump_sym = os.path.join(tmp_dir, 'dump.bz2')
        os.symlink(dump_path, dump_sym)
        print "Extracting dump..."

        extract_dir = os.path.join(tmp_dir,'extracts')
        extract_dump(dump_sym, extract_dir, quiet=True)

        print "Building vocabulary and sequence array.."
        seq,voc = _build_dataset(extract_dir, path,max,th)

        # clean up temp file:
        print "Removing dump"
        shutil.rmtree(extract_dir)

        return seq, voc
Exemple #39
0
def get_atom_init_json_filepath(download_if_not_exist=True):
    """Construct a filepath which stores atom_init_json

    This method check whether the file exist or not,  and downloaded it if
    necessary.

    Args:
        download_if_not_exist (bool): If `True` download dataset
            if it is not downloaded yet.

    Returns (str): file path for atom_init_json
    """
    cache_root = download.get_dataset_directory(_root)
    cache_path = os.path.join(cache_root, file_name_atom_init_json)
    if not os.path.exists(cache_path) and download_if_not_exist:
        logger = getLogger(__name__)
        logger.info('Downloading atom_init.json...')
        download_file_path = download.cached_download(download_url)
        shutil.copy(download_file_path, cache_path)
    return cache_path
Exemple #40
0
    def creator(path):
        archive_path = download.cached_download(url)

        images = []
        labels = []

        with zipfile.ZipFile(archive_path, 'r') as archive:
            for i in range(1, 378 + 1):
                image_name = 'base/cmp_b{:04d}.jpg'.format(i)
                label_name = 'base/cmp_b{:04d}.png'.format(i)

                image = Image.open(io.BytesIO(archive.read(image_name)))
                image = np.asarray(image)
                images.append(image)
                label = Image.open(io.BytesIO(archive.read(label_name)))
                label = np.asarray(label)
                labels.append(label)

        np.savez_compressed(path, images=images, labels=labels)
        return {'images': images, 'labels': labels}
Exemple #41
0
def download():
    url = "http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/BSR/BSR_bsds500.tgz"
    archive_path = dl.cached_download(url)
    with tarfile.open(archive_path, 'r:gz') as archive:
        names = archive.getnames()

        cache_root = "./temp/"
        try:
            os.makedirs(cache_root)
        except OSError:
            if not os.path.isdir(cache_root):
                raise
        cache_path = tempfile.mkdtemp(dir=cache_root)

        data = []

        try:
            for name in names:
                if bool(re.search("BSR/BSDS500/data/images/train/",
                                  name)) and bool(re.search("jpg", name)):
                    path = cache_path
                    archive.extract(name, path)
                    img = imreadBGR(path + "/" + name)
                    lead = img.shape[0]
                    #for bsds500, imgs are of shape 481*321*3
                    if lead != 481:
                        img = img.transpose([1, 0, 2])

                    lead = img.shape[0]
                    if lead != 481:
                        raise ValueError(
                            "img shape not to be 481*321*3, but: {}".format(
                                img.shape))

                    data.append(img)
        finally:
            shutil.rmtree(cache_root)

        data = np.stack(data, axis=0)
        data = data.transpose([0, 3, 1, 2])
        return data
Exemple #42
0
def download():
	url = "http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/BSR/BSR_bsds500.tgz"
	archive_path = dl.cached_download(url)
	with tarfile.open(archive_path, 'r:gz') as archive:
		names = archive.getnames()

		cache_root = "./temp/" 
		try:
			os.makedirs(cache_root)
		except OSError:
			if not os.path.isdir(cache_root):
				raise
		cache_path = tempfile.mkdtemp(dir=cache_root)

		data = []

		try:
			for name in names:
				if bool(re.search("BSR/BSDS500/data/images/train/", name)) and bool(re.search("jpg", name)):
					path = cache_path
					archive.extract(name, path)
					img = imreadBGR(path+"/"+name)
					lead = img.shape[0]
					#for bsds500, imgs are of shape 481*321*3
					if lead!=481:
						img = img.transpose([1,0,2])

					lead = img.shape[0]
					if lead!=481:
						raise ValueError("img shape not to be 481*321*3, but: {}".format(img.shape))

					data.append(img)
		finally:
			shutil.rmtree(cache_root)

		data = np.stack(data, axis=0)
		data = data.transpose([0,3,1,2])
		return data
Exemple #43
0
def download(dataset_type='unprocessed'):
    if dataset_type == 'unprocessed':
        url = "http://www.cs.columbia.edu/CAVE/databases/SLAM_coil-20_coil-100/coil-20/coil-20-unproc.zip"
    elif dataset_type == 'processed':
        url = 'http://www.cs.columbia.edu/CAVE/databases/SLAM_coil-20_coil-100/coil-20/coil-20-proc.zip'
    else:
        raise ValueError(
            "dataset_type should be either unprocessed or processed")

    archive_path = dl.cached_download(url)
    fileOb = zipfile.ZipFile(archive_path, mode='r')
    names = fileOb.namelist()

    cache_root = "./temp/"
    try:
        os.makedirs(cache_root)
    except OSError:
        if not os.path.isdir(cache_root):
            raise
    cache_path = tempfile.mkdtemp(dir=cache_root)

    data, label = [], []

    try:
        for name in names:
            path = cache_path + name
            if bool(re.search('obj', name)):
                img = imreadBGR(fileOb.extract(name, path=path))
                data.append(img)
                label.append(int(name.split("__")[0].split("/obj")[1]))
    finally:
        shutil.rmtree(cache_root)

    data = np.stack(data, axis=0).transpose([0, 3, 1, 2])
    label = np.array(label).astype(np.uint8)
    data = np.reshape(data, (data.shape[0], -1))
    return data, label
Exemple #44
0
def download(dataset_type='unprocessed'):
	if dataset_type=='unprocessed':
		url = "http://www.cs.columbia.edu/CAVE/databases/SLAM_coil-20_coil-100/coil-20/coil-20-unproc.zip"
	elif dataset_type=='processed':
		url = 'http://www.cs.columbia.edu/CAVE/databases/SLAM_coil-20_coil-100/coil-20/coil-20-proc.zip'
	else:
		raise ValueError("dataset_type should be either unprocessed or processed")

	archive_path = dl.cached_download(url)
	fileOb = zipfile.ZipFile(archive_path, mode='r')
	names = fileOb.namelist()

	cache_root = "./temp/" 
	try:
		os.makedirs(cache_root)
	except OSError:
		if not os.path.isdir(cache_root):
			raise
	cache_path = tempfile.mkdtemp(dir=cache_root)

	data, label = [], []

	try:
		for name in names:
				path = cache_path+name
				if bool(re.search('obj', name)):
					img = imreadBGR(fileOb.extract(name, path=path))
					data.append(img)
					label.append(int(name.split("__")[0].split("/obj")[1]))
	finally:
		shutil.rmtree(cache_root)


	data = np.stack(data, axis=0).transpose([0, 3, 1, 2])
	label = np.array(label).astype(np.uint8)
	return data, label
Exemple #45
0
def _make_npz(path_npz, url, model):
    path_caffemodel = download.cached_download(url)
    print('Now loading caffemodel (usually it may take few minutes)')
    GoogLeNet.convert_caffemodel_to_npz(path_caffemodel, path_npz)
    npz.load_npz(path_npz, model)
    return model
Exemple #46
0
def _make_npz(path_npz, url, model):
    path_caffemodel = download.cached_download(url)
    print('Now loading caffemodel (usually it may take few minutes)')
    VGG16Layers.convert_caffemodel_to_npz(path_caffemodel, path_npz)
    npz.load_npz(path_npz, model)
    return model