예제 #1
0
    def __init__(self, param_path=None):
        assert os.path.isfile(
            param_path), "pretrained VGG19 weights not found."
        self.h5_file = param_path
        if not os.path.exists(self.h5_file):
            print(
                "Pretrained VGG19 parameters not found. Downloading. Please wait..."
            )
            url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/first-order-model/vgg19.h5"
            from nnabla.utils.data_source_loader import download
            download(url, url.split('/')[-1], False)

        with nn.parameter_scope("VGG19"):
            logger.info('loading vgg19 parameters...')
            nn.load_parameters(self.h5_file)
            # drop all the affine layers.
            drop_layers = [
                'classifier/0/affine', 'classifier/3/affine',
                'classifier/6/affine'
            ]
            for layers in drop_layers:
                nn.parameter.pop_parameter((layers + '/W'))
                nn.parameter.pop_parameter((layers + '/b'))
            self.mean = nn.Variable.from_numpy_array(
                np.asarray([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1))
            self.std = nn.Variable.from_numpy_array(
                np.asarray([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1))
예제 #2
0
    def load_checkpoint(self, args):
        """Load pretrained parameters and solver states

        Args:
                args (ArgumentParser): To check if tensorflow trained weights are to be used for testing and to get the path of the folder 
                                                                from where the parameter and solver states are to be loaded
        """

        if args.use_tf_weights:
            if not os.path.isfile(
                    os.path.join(args.weights_path, 'gen_params.h5')):
                os.makedirs(args.weights_path, exist_ok=True)
                print(
                    "Downloading the pretrained tf-converted weights. Please wait..."
                )
                url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5"
                from nnabla.utils.data_source_loader import download
                download(url, os.path.join(args.weights_path, 'gen_params.h5'),
                         False)
            nn.load_parameters(os.path.join(args.weights_path,
                                            'gen_params.h5'))
            print('Loaded pretrained weights from tensorflow!')

        else:
            try:
                nn.load_parameters(os.path.join(args.weights_path,
                                                'params.h5'))
            except:
                if args.test:
                    warnings.warn(
                        "Testing Model without pretrained weights!!!")
                else:
                    print('No Pretrained weights loaded.')
예제 #3
0
    def load_checkpoint(self, args):
        """Load pretrained parameters and solver states

        Args:
                args (ArgumentParser): To check if tensorflow trained weights are to be used for testing and to get the path of the folder 
                                                                from where the parameter and solver states are to be loaded
        """

        if args.use_tf_weights:
            if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')):
                os.makedirs(args.weights_path, exist_ok=True)
                print("Downloading the pretrained tf-converted weights. Please wait...")
                url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5"
                from nnabla.utils.data_source_loader import download
                download(url, os.path.join(
                    args.weights_path, 'gen_params.h5'), False)
            nn.load_parameters(os.path.join(
                args.weights_path, 'gen_params.h5'))

            print('Loaded pretrained weights from tensorflow!')

        else:
            try:
                if args.pre_trained_model is not None:
                    if os.path.isfile(args.pre_trained_model):
                        nn.load_parameters(args.pre_trained_model)
                    elif os.path.isfile(os.path.join(args.pre_trained_model, 'ffhq-slim-gen-256-config-e.h5')):
                        if args.train:
                            with nn.parameter_scope('Generator'):
                                nn.load_parameters(os.path.join(
                                    args.pre_trained_model, 'ffhq-slim-gen-256-config-e.h5'))
                            with nn.parameter_scope('GeneratorEMA'):
                                nn.load_parameters(os.path.join(
                                    args.pre_trained_model, 'ffhq-slim-gen-256-config-e.h5'))
                        else:
                            nn.load_parameters(os.path.join(
                                args.pre_trained_model, 'ffhq-slim-gen-256-config-e.h5'))
                            nn.load_parameters(os.path.join(
                                args.pre_trained_model, 'ffhq-slim-gen-256-config-e.h5'))
                        if os.path.isfile(os.path.join(args.pre_trained_model, 'ffhq-slim-disc-256-config-e-corrected.h5')):
                            nn.load_parameters(os.path.join(
                                args.pre_trained_model, 'ffhq-slim-disc-256-config-e-corrected.h5'))
                if os.path.isdir(args.weights_path):
                    with nn.parameter_scope('Discriminator'):
                        nn.load_parameters(os.path.join(
                            args.weights_path, 'disc_params.h5'))
                    with nn.parameter_scope('Generator'):
                        nn.load_parameters(os.path.join(
                            args.weights_path, 'gen_params.h5'))
                    with nn.parameter_scope('GeneratorEMA'):
                        nn.load_parameters(os.path.join(
                            args.weights_path, 'gen_ema_params.h5'))
            except:
                if args.test:
                    warnings.warn(
                        "Testing Model without pretrained weights!!!")
                else:
                    print('No Pretrained weights loaded.')
예제 #4
0
def download_provided_file(url, filepath=None, verbose=True):
    if not filepath:
        filepath = os.path.basename(url)
    if not os.path.exists(filepath):
        if verbose:
            logger.info(f"{filepath} not found. Downloading...")
        download(url, filepath, False)
        if verbose:
            logger.info(f"Downloaded {filepath}.")
    return
예제 #5
0
def load_imdb(vocab_size: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    file_name = 'imdb.npz'
    url = f'https://s3.amazonaws.com/text-datasets/{file_name}'
    download(url, open_file=False)

    dataset_path = Path(get_data_home()) / file_name

    unk_index = vocab_size - 1
    raw = load_npy(dataset_path)
    ret = dict()
    for k, v in raw.items():
        if 'x' in k:
            for i, sentence in enumerate(v):
                v[i] = [word if word < unk_index else unk_index for word in sentence]
        ret[k] = v
    return ret['x_train'], ret['x_test'], ret['y_train'], ret['y_test']
예제 #6
0
    def __init__(self, train=True, shuffle=False, rng=None, output_dir=None):
        super(STL10DataSource, self).__init__(shuffle=shuffle, rng=rng)
        self._train = train
        data_uri = 'http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz'
        logger.info('Getting labbeled data from {}'.format(data_uri))
        default_seed = 313
        output_file = get_filename_to_download(output_dir, data_uri)
        r = download(data_uri, output_file=output_file)  # file object returned
        print(r.name)
        binary_dir = os.path.join(output_dir, "stl10_binary")
        with tarfile.open(fileobj=r, mode="r:gz") as tar:
            tar.extractall(path=output_dir)

        for member in os.listdir(binary_dir):
            if train:
                if 'train_' not in member:
                    continue
                print(member)
                self.load_image_and_labels(os.path.join(binary_dir, member))

            # Validation data
            else:
                print(member)
                if 'test_' not in member:
                    continue
                self.load_image_and_labels(os.path.join(binary_dir, member))
        r.close
        logger.info('Getting labeled data from {}'.format(data_uri))
        self._size = self._labels.size
        self._variables = ('x', 'y')
        if rng is None:
            rng = np.random.RandomState(default_seed)
        self.rng = rng
        self.reset()
예제 #7
0
    def __init__(self, width, height, padding, train=True, shuffle=False, rng=None):
        super(Caltech101DataSource, self).__init__(shuffle=shuffle, rng=rng)
        data_uri = "http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz"
        logger.info('Getting labeled data from {}.'.format(data_uri))
        r = download(data_uri)  # file object returned
        label_dict = dict()
        with tarfile.open(fileobj=r, mode="r:gz") as fpin:
            images = []
            labels = []
            for name in fpin.getnames():
                if ".jpg" not in name or "Google" in name:
                    continue
                label, filename = name.split("/")[-2:]
                if label not in label_dict:
                    label_dict[label] = len(label_dict)
                im = imread(fpin.extractfile(name), num_channels=3)
                arranged_images = self._resize_image(
                    im, width, height, padding)
                images.append(arranged_images)
                labels.append(label_dict[label])
            self._size = len(images)
            self._images = np.array(images)
            self._labels = np.array(labels).reshape(-1, 1)
        r.close()
        logger.info('Getting labeled data from {}.'.format(data_uri))

        self._size = self._labels.size
        self._variables = ('x', 'y')
        if rng is None:
            rng = np.random.RandomState(313)
        self.rng = rng
        self._indexes = rng.permutation(self._size)
예제 #8
0
    def __init__(self,
                 train=True,
                 shuffle=False,
                 rng=None,
                 label_shuffle=True,
                 label_shuffle_rate=0.1):
        super(Cifar10DataSource, self).__init__(shuffle=shuffle, rng=rng)

        self._train = train

        data_uri = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
        logger.info('Getting labeled data from {}.'.format(data_uri))
        r = download(data_uri)  # file object returned
        with tarfile.open(fileobj=r, mode="r:gz") as fpin:
            # Training data
            if train:
                images = []
                labels = []
                for member in fpin.getmembers():
                    if "data_batch" not in member.name:
                        continue
                    fp = fpin.extractfile(member)
                    data = np.load(fp, encoding="bytes", allow_pickle=True)
                    images.append(data[b"data"])
                    labels.append(data[b"labels"])
                self._size = 50000
                self._images = np.concatenate(images).reshape(
                    self._size, 3, 32, 32)

                self._labels = np.concatenate(labels).reshape(-1, 1)
                self.raw_label = self._labels.copy()
                if label_shuffle:
                    self.shuffle_rate = label_shuffle_rate
                    self.label_shuffle()
                    print(f"{self.shuffle_rate*100}% of data was shuffled ")
                    print(len(np.where(self._labels != self.raw_label)[0]))
            # Validation data
            else:
                for member in fpin.getmembers():
                    if "test_batch" not in member.name:
                        continue
                    fp = fpin.extractfile(member)
                    data = np.load(fp, encoding="bytes", allow_pickle=True)
                    images = data[b"data"]
                    labels = data[b"labels"]
                self._size = 10000
                self._images = images.reshape(self._size, 3, 32, 32)
                self._labels = np.array(labels).reshape(-1, 1)
        r.close()
        logger.info('Getting labeled data from {}.'.format(data_uri))

        self._size = self._labels.size
        self._variables = ('x', 'y')
        if rng is None:
            rng = np.random.RandomState(313)
        self.rng = rng
        self.reset()
예제 #9
0
def main():

    args = get_args()

    nn.set_default_context(
        get_extension_context(args.extension, device_id=args.device_id))

    if args.nnp is None:
        local_nnp_dir = os.path.join("asset", args.gym_env)
        local_nnp_file = os.path.join(local_nnp_dir, "qnet.nnp")

        if not find_local_nnp(args.gym_env):
            logger.info("Downloading nnp data since you didn't specify...")
            nnp_uri = os.path.join(
                "https://nnabla.org/pretrained-models/nnp_models/examples/dqn",
                args.gym_env, "qnet.nnp")
            if not os.path.exists(local_nnp_dir):
                os.mkdir(local_nnp_dir)
            download(nnp_uri, output_file=local_nnp_file, open_file=False)
            logger.info("Download done!")

        args.nnp = local_nnp_file

    from atari_utils import make_atari_deepmind
    env = make_atari_deepmind(args.gym_env, valid=False)
    print('Observation:', env.observation_space)
    print('Action:', env.action_space)
    obs_sampler = ObsSampler(args.num_frames)
    val_replay_memory = ReplayMemory(env.observation_space.shape,
                                     env.action_space.shape,
                                     max_memory=args.num_frames)
    # just play greedily
    explorer = GreedyExplorer(env.action_space.n,
                              use_nnp=True,
                              nnp_file=args.nnp,
                              name='qnet')
    validator = Validator(env,
                          val_replay_memory,
                          explorer,
                          obs_sampler,
                          num_episodes=1,
                          render=not args.no_render)
    while True:
        validator.step()
예제 #10
0
    def __init__(self, train=True, shuffle=False, rng=None, output_dir=None):
        super(MnistDataSource, self).__init__(shuffle=shuffle)
        self._train = train
        if self._train:
            image_uri = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
            label_uri = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
        else:
            image_uri = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz'
            label_uri = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'

        logger.info('Getting label data from {}.'.format(label_uri))
        # With python3 we can write this logic as following, but with
        # python2, gzip.object does not support file-like object and
        # urllib.request does not support 'with statement'.
        #
        #   with request.urlopen(label_uri) as r, gzip.open(r) as f:
        #       _, size = struct.unpack('>II', f.read(8))
        #       self._labels = numpy.frombuffer(f.read(), numpy.uint8).reshape(-1, 1)
        #
        label_output_file = get_filename_to_download(output_dir, label_uri)
        # file object returned
        r = download(label_uri, output_file=label_output_file)
        data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32)
        _, size = struct.unpack('>II', data[0:8])
        self._labels = numpy.frombuffer(data[8:], numpy.uint8).reshape(-1, 1)
        r.close()
        logger.info('Getting label data done.')

        logger.info('Getting image data from {}.'.format(image_uri))
        image_output_file = get_filename_to_download(output_dir, image_uri)
        r = download(image_uri, output_file=image_output_file)
        data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32)
        _, size, height, width = struct.unpack('>IIII', data[0:16])
        self._images = numpy.frombuffer(data[16:], numpy.uint8).reshape(
            size, 1, height, width)
        r.close()
        logger.info('Getting image data done.')

        self._size = self._labels.size
        self._variables = ('x', 'y')
        if rng is None:
            rng = numpy.random.RandomState(313)
        self.rng = rng
        self.reset()
예제 #11
0
def load_mnist(train=True):
    '''
    Load MNIST dataset images and labels from the original page by Yan LeCun or the cache file.

    Args:
        train (bool): The testing dataset will be returned if False. Training data has 60000 images, while testing has 10000 images.

    Returns:
        numpy.ndarray: A shape of (#images, 1, 28, 28). Values in [0.0, 1.0].
        numpy.ndarray: A shape of (#images, 1). Values in {0, 1, ..., 9}.

    '''
    if train:
        image_uri = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
        label_uri = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
    else:
        image_uri = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz'
        label_uri = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'
    logger.info('Getting label data from {}.'.format(label_uri))
    # With python3 we can write this logic as following, but with
    # python2, gzip.object does not support file-like object and
    # urllib.request does not support 'with statement'.
    #
    #   with request.urlopen(label_uri) as r, gzip.open(r) as f:
    #       _, size = struct.unpack('>II', f.read(8))
    #       labels = numpy.frombuffer(f.read(), numpy.uint8).reshape(-1, 1)
    #
    r = download(label_uri)
    data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32)
    _, size = struct.unpack('>II', data[0:8])
    labels = numpy.frombuffer(data[8:], numpy.uint8).reshape(-1, 1)
    r.close()
    logger.info('Getting label data done.')

    logger.info('Getting image data from {}.'.format(image_uri))
    r = download(image_uri)
    data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32)
    _, size, height, width = struct.unpack('>IIII', data[0:16])
    images = numpy.frombuffer(data[16:], numpy.uint8).reshape(
        size, 1, height, width)
    r.close()
    logger.info('Getting image data done.')

    return images, labels
def main():

    output_nnabla_file = 'nbla_bert_params.h5'
    r = download(
        "https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip", 'uncased_L-12_H-768_A-12.zip')

    with zipfile.ZipFile("uncased_L-12_H-768_A-12.zip", "r") as zip_ref:
        zip_ref.extractall(".")
    input_ckpt_file = 'uncased_L-12_H-768_A-12/bert_model.ckpt'
    output_nnabla_file = 'nbla_bert_params.h5'
    convert(input_ckpt_file, output_nnabla_file)
예제 #13
0
파일: mnist_data.py 프로젝트: zwsong/nnabla
    def __init__(self, train=True, shuffle=False, rng=None):
        super(MnistDataSource, self).__init__(shuffle=shuffle)
        self._train = train
        if self._train:
            image_uri = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
            label_uri = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
        else:
            image_uri = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz'
            label_uri = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'

        logger.info('Getting label data from {}.'.format(label_uri))
        # With python3 we can write this logic as following, but with
        # python2, gzip.object does not support file-like object and
        # urllib.request does not support 'with statement'.
        #
        #   with request.urlopen(label_uri) as r, gzip.open(r) as f:
        #       _, size = struct.unpack('>II', f.read(8))
        #       self._labels = numpy.frombuffer(f.read(), numpy.uint8).reshape(-1, 1)
        #
        r = download(label_uri)
        data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32)
        _, size = struct.unpack('>II', data[0:8])
        self._labels = numpy.frombuffer(data[8:], numpy.uint8).reshape(-1, 1)
        r.close()
        logger.info('Getting label data done.')

        logger.info('Getting image data from {}.'.format(image_uri))
        r = download(image_uri)
        data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32)
        _, size, height, width = struct.unpack('>IIII', data[0:16])
        self._images = numpy.frombuffer(data[16:], numpy.uint8).reshape(
            size, 1, height, width)
        r.close()
        logger.info('Getting image data done.')

        self._size = self._labels.size
        self._variables = ('x', 'y')
        if rng is None:
            rng = numpy.random.RandomState(313)
        self.rng = rng
        self.reset()
예제 #14
0
def load_cyclegan_dataset(dataset="horse2zebra",
                          train=True,
                          domain="A",
                          normalize_method=lambda x: (x - 127.5) / 127.5):
    '''
    Load CycleGAN dataset from `here <https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/>`_ 

    This function assumes that there are two domains in the dataset.

    Args:
        dataset (str): Dataset name excluding ".zip" extension, which you can find that `here <https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/>`_.
        train (bool): The testing dataset will be returned if False. Training data has 60000 images, while testing has 10000 images.
        domain (str): Domain name. It must be "A" or "B".
        normalize_method: Function of how to normalize an image.
    Returns:
        (np.ndarray, list): Images and filenames.

    '''
    assert domain in ["A", "B"]

    image_uri = 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/{}.zip'.format(
        dataset)
    logger.info('Getting {} data from {}.'.format(dataset, image_uri))
    r = download(image_uri)

    # Load unpaired images from zipfile.
    with zipfile.ZipFile(r, "r") as zf:
        images = []
        filename_list = []
        dirname = "{}{}".format("train" if train else "test", domain)

        # filter images by name
        zipinfos = filter(
            lambda zinfo: dirname in zinfo.filename and ".jpg" in zinfo.
            filename, zf.infolist())
        for zipinfo in zipinfos:
            with zf.open(zipinfo.filename, "r") as fp:
                # filename
                filename = zipinfo.filename
                logger.info('loading {}'.format(filename))

                # load image
                image = scipy.misc.imread(fp, mode="RGB")
                #image = scipy.misc.imread(fp)
                image = np.transpose(image, (2, 0, 1))
                image = normalize_method(image)
                image_name, ext = os.path.splitext(filename.split("/")[-1])
                images.append(image)
                filename_list.append(image_name)
    r.close()
    logger.info('Getting image data done.')
    return np.asarray(images), filename_list
예제 #15
0
def download_tiny_imagenet():
    url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
    dir_data = os.path.join(get_data_home(), 'tiny-imagenet-200')
    if not os.path.isdir(dir_data):
        f = download(url)
        logger.info('Extracting {} ...'.format(f.name))
        z = zipfile.ZipFile(f)
        d = get_data_home()
        l = z.namelist()
        for i in tqdm(range(len(l))):
            z.extract(l[i], d)
        z.close()
        f.close()
    return dir_data
예제 #16
0
    def __init__(self, train=True, shuffle=False, rng=None):
        super(Cifar100DataSource, self).__init__(shuffle=shuffle, rng=rng)

        self._train = train
        data_uri = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
        logger.info('Getting labeled data from {}.'.format(data_uri))
        r = download(data_uri)  # file object returned
        with tarfile.open(fileobj=r, mode="r:gz") as fpin:
            # Training data
            if train:
                images = []
                labels = []
                for member in fpin.getmembers():
                    if "train" not in member.name:
                        continue
                    fp = fpin.extractfile(member)
                    data = np.load(fp, encoding="bytes", allow_pickle=True)
                    images = data[b"data"]
                    labels = data[b"fine_labels"]
                self._size = 50000
                self._images = images.reshape(self._size, 3, 32, 32)
                self._labels = np.array(labels).reshape(-1, 1)
            # Validation data
            else:
                for member in fpin.getmembers():
                    if "test" not in member.name:
                        continue
                    fp = fpin.extractfile(member)
                    data = np.load(fp, encoding="bytes", allow_pickle=True)
                    images = data[b"data"]
                    labels = data[b"fine_labels"]
                self._size = 10000
                self._images = images.reshape(self._size, 3, 32, 32)
                self._labels = np.array(labels).reshape(-1, 1)
        r.close()
        logger.info('Getting labeled data from {} done.'.format(data_uri))

        self._size = self._labels.size
        self._variables = ('x', 'y')
        if rng is None:
            rng = np.random.RandomState(313)
        self.rng = rng
        self.reset()
예제 #17
0
def load_omniglot(test=False):

    fname = "images_background.zip"
    if test:
        fname = "images_evaluation.zip"

    dlname = OMNIGLOT_URL + fname
    r = download(dlname)
    f = zipfile.ZipFile(r, mode="r")

    x = []
    y = []
    imgs = []
    lang_dict = {}
    n_letter = 0
    for path in f.namelist():

        # Four types of "path" is possible
        #  "image_xxx/"
        #  "image_xxx/Alphabet"
        #  "image_xxx/Alphabet/Letter"
        #  "image_xxx/Alphabet/Letter/img.png"

        names = path.split('/')
        if len(names) == 3:  # i.e. [images_xxx, Alphabet, None]
            alphabet = names[1]
            print("loading alphabet: " + alphabet)
            lang_dict[alphabet] = [n_letter, None]
        if len(names) == 4:
            if names[
                    3] is not '':  # i.e. [images_xxx, Alphabet, Letter, Image]
                imgs.append(imageio.imread(f.read(path)))
        if len(imgs) == 20:  # Number of images are limited to 20
            x.append(np.stack(imgs))
            y.append(np.ones(20, ) * n_letter)
            n_letter += 1
            lang_dict[alphabet][1] = n_letter - 1
            imgs = []

    x = np.stack(x)
    y = np.stack(y)
    return x, y, lang_dict
예제 #18
0
def download_tiny_imagenet():
    """
        Downloads and unzips the tiny imagenet dataset.

        Returns:
                dir_data: the directory of the data containing the tiny imagenet
    """

    url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
    dir_data = os.path.join('datasets', 'tiny-imagenet-200')
    if not os.path.isdir(dir_data):
        f = download(url)
        logger.info('Extracting {} ...'.format(f.name))
        z = zipfile.ZipFile(f)
        d = 'datasets/'
        l = z.namelist()
        for i in tqdm(range(len(l))):
            z.extract(l[i], d)
        z.close()
        f.close()
    return dir_data
예제 #19
0
    def _load_data(url: str):
        with download(url, open_file=True) as f:
            lines = f.read().decode('utf-8').replace('\n', ' <eos> ')
            words = lines.strip().split()
        dataset = np.ndarray((len(words), ), dtype=np.int32)

        for i, word in enumerate(words):
            if word not in w2i:
                w2i[word] = len(w2i)
            if w2i[word] not in i2w:
                i2w[w2i[word]] = word
            dataset[i] = w2i[word]

        sentences = []
        sentence = []
        for index in dataset:
            if i2w[index] != '<eos>':
                sentence.append(index)
            else:
                sentences.append(sentence)
                sentence = []
        return sentences
예제 #20
0
def load_pix2pix_dataset(dataset="edges2shoes", train=True, num_samples=-1):
    image_uri = 'https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/{}.tar.gz'\
                .format(dataset)
    logger.info('Getting {} data from {}.'.format(dataset, image_uri))
    r = download(image_uri)

    # Load concatenated images, then save separately
    # TODO: how to do for test
    img_A_list = []
    img_B_list = []
    img_names = []
    with tarfile.open(fileobj=r, mode="r") as tar:
        cnt = 0
        for tinfo in tar.getmembers():
            if not ".jpg" in tinfo.name:
                continue
            if not ((train == True and "train" in tinfo.name) or
                    (train == False and "val" in tinfo.name)):
                continue

            logger.info("Loading {} ...".format(tinfo.name))
            f = tar.extractfile(tinfo)
            img = scipy.misc.imread(f, mode="RGB")
            h, w, c = img.shape
            img_A = img[:, 0:w // 2, :].transpose((2, 0, 1))
            img_B = img[:, w // 2:, :].transpose((2, 0, 1))
            img_A_list.append(img_A)
            img_B_list.append(img_B)
            img_names.append(tinfo.name.split("/")[-1])
            cnt += 1

            if num_samples != -1 and cnt >= num_samples:
                break

    r.close()
    logger.info('Getting image data done.')
    img_A, img_B = np.asarray(img_A_list), np.asarray(img_B_list)
    return img_A, img_B, img_names
예제 #21
0
def load_ptbset(ptbfile):
    """
    Load Penn Treebank Corpus 

    """

    f = download(ptbfile)
    itow = {}  # index to word
    wtoi = {}  # word to index
    dataset = []

    # extract vocabraries from corpus
    for line in f:
        for w in line.split():
            # register the new word as an index number of first appearance
            if w not in wtoi:
                i = len(wtoi)
                wtoi[w] = i
                itow[i] = w
            # translate words into numbers
            dataset.append(wtoi[w])

    return itow, wtoi, dataset
예제 #22
0
    def _load_data(self, type_name: str) -> List[List[int]]:
        url = self.ptb_url.format(type_name)
        with download(url, open_file=True) as f:
            lines: str = f.read().decode('utf-8').replace('\n', '<eos>')

            if self.return_char_info:
                for char in set(lines):
                    if char not in self.c2i:
                        self.c2i[char] = len(self.c2i)
                    if self.c2i[char] not in self.i2c:
                        self.i2c[self.c2i[char]] = char

            words = lines.strip().split()
        dataset = np.ndarray((len(words), ), dtype=np.int32)

        for i, word in enumerate(words):
            if word not in self.w2i:
                self.w2i[word] = len(self.w2i)
            if self.w2i[word] not in self.i2w:
                self.i2w[self.w2i[word]] = word
            dataset[i] = self.w2i[word]

        sentences = []
        sentence = []
        if self.with_bos:
            sentence.append(self.w2i['<bos>'])
        for index in dataset:
            if self.i2w[index] != '<eos>':
                sentence.append(index)
            else:
                sentence.append(index)
                sentences.append(sentence)
                sentence = []
                if self.with_bos:
                    sentence.append(self.w2i['<bos>'])
        return sentences
예제 #23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--output-filename',
                        '-o',
                        type=str,
                        default=None,
                        help="name of an output image file.")
    parser.add_argument('--output-dir',
                        '-d',
                        type=str,
                        default="results",
                        help="directory where the generated image is saved.")

    parser.add_argument('--seed',
                        type=int,
                        required=True,
                        help="seed for primal style noise.")
    parser.add_argument('--stochastic-seed',
                        type=int,
                        default=1,
                        help="seed for noises added to intermediate features.")

    parser.add_argument('--truncation-psi',
                        default=0.5,
                        type=float,
                        help="value for truncation trick.")

    parser.add_argument('--batch-size',
                        type=int,
                        default=1,
                        help="Number of images to generate.")

    parser.add_argument(
        '--mixing',
        action='store_true',
        help="if specified, apply style mixing with additional seed.")
    parser.add_argument('--seed-mix',
                        type=int,
                        default=None,
                        help="seed for another / secondary style noise.")
    parser.add_argument('--mix-after',
                        type=int,
                        default=7,
                        help="after this layer, style mixing is applied.")

    parser.add_argument('--context',
                        '-c',
                        type=str,
                        default="cudnn",
                        help="context. cudnn is recommended.")

    args = parser.parse_args()

    assert 0 < args.mix_after < 17, "specify --mix-after from 1 to 16."

    if not os.path.isfile("styleGAN2_G_params.h5"):
        print("Downloading the pretrained weight. Please wait...")
        url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5"
        from nnabla.utils.data_source_loader import download
        download(url, url.split('/')[-1], False)

    ctx = get_extension_context(args.context)
    nn.set_default_context(ctx)

    batch_size = args.batch_size
    num_layers = 18

    rnd = np.random.RandomState(args.seed)
    z = rnd.randn(batch_size, 512)

    print("Generation started...")
    print(f"truncation value: {args.truncation_psi}")
    print(f"seed for additional noise: {args.stochastic_seed}")

    # Inference via nn.NdArray utilizes significantly less memory

    if args.mixing:
        # apply style mixing
        assert args.seed_mix
        print(
            f"using style noise seed {args.seed} for layers 0-{args.mix_after - 1}"
        )
        print(
            f"using style noise seed {args.seed_mix} for layers {args.mix_after}-{num_layers}."
        )
        rnd = np.random.RandomState(args.seed_mix)
        z2 = rnd.randn(batch_size, 512)
        style_noises = [nn.NdArray.from_numpy_array(z)]
        style_noises += [nn.NdArray.from_numpy_array(z2)]
    else:
        # no style mixing (single noise / style is used)
        print(f"using style noise seed {args.seed} for entire layers.")
        style_noises = [nn.NdArray.from_numpy_array(z) for _ in range(2)]

    nn.set_auto_forward(True)
    nn.load_parameters("styleGAN2_G_params.h5")
    rgb_output = generate(batch_size, style_noises, args.stochastic_seed,
                          args.mix_after, args.truncation_psi)

    # convert to uint8 to save an image file
    image = convert_images_to_uint8(rgb_output, drange=[-1, 1])
    if args.output_filename is None:
        if not args.mixing:
            filename = f"seed{args.seed}"
        else:
            filename = f"seed{args.seed}_{args.seed_mix}"
    else:
        filename = args.output_filename

    os.makedirs(args.output_dir, exist_ok=True)

    for i in range(batch_size):
        filepath = os.path.join(args.output_dir, f'{filename}_{i}.png')
        imsave(filepath, image[i], channel_first=True)
        print(f"Genetation completed. Saved {filepath}.")
예제 #24
0
    nn.set_default_context(ctx)
"""
"""

embedding_size: int = 2
batch_size: int = 1
max_epoch: int = 50
negative_sample_size = 10

file_url = 'https://raw.githubusercontent.com/qiangsiwei/poincare_embedding/master/data/mammal_subtree.tsv'

from functools import reduce
import operator
import random

with download(file_url, open_file=True) as f:
    lines: str = f.read().decode('utf-8').split('\n')
    pdata = list(map(lambda l: l.split('\t'), filter(None, lines)))

pdict = {w: i for i, w in enumerate(set(reduce(operator.add, pdata)))}

vocab_size: int = len(pdict)
num_train_batch = len(pdata) // batch_size


def load_train_func(index):
    x, y = pdata[index]
    negative_sample_prob = np.ones(len(pdict))
    negative_sample_prob[pdict[x]] = 0.0
    negative_sample_prob[pdict[y]] = 0.0
    negative_sample_prob /= len(pdict) - 2
예제 #25
0
def main():
    args = get_args()
    # Get context
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)
    nn.set_auto_forward(True)

    image = io.imread(args.test_image)
    if image.ndim == 2:
        image = color.gray2rgb(image)
    elif image.shape[-1] == 4:
        image = image[..., :3]

    if args.context == 'cudnn':
        if not os.path.isfile(args.cnn_face_detction_model):
            # Block of bellow code will download the cnn based face-detection model file provided by dlib for face detection
            # and will save it in the directory where this script is executed.
            print("Downloading the face detection CNN. Please wait...")
            url = "http://dlib.net/files/mmod_human_face_detector.dat.bz2"
            from nnabla.utils.data_source_loader import download
            download(url, url.split('/')[-1], False)
            # get the decompressed data.
            data = bz2.BZ2File(url.split('/')[-1]).read()
            # write to dat file.
            open(url.split('/')[-1][:-4], 'wb').write(data)
        face_detector = dlib.cnn_face_detection_model_v1(
            args.cnn_face_detction_model)
        detected_faces = face_detector(
            cv2.cvtColor(image[..., ::-1].copy(), cv2.COLOR_BGR2GRAY))
        detected_faces = [[
            d.rect.left(),
            d.rect.top(),
            d.rect.right(),
            d.rect.bottom()
        ] for d in detected_faces]
    else:
        face_detector = dlib.get_frontal_face_detector()
        detected_faces = face_detector(
            cv2.cvtColor(image[..., ::-1].copy(), cv2.COLOR_BGR2GRAY))
        detected_faces = [[d.left(), d.top(),
                           d.right(), d.bottom()] for d in detected_faces]

    if len(detected_faces) == 0:
        print("Warning: No faces were detected.")
        return None

    # Load FAN weights
    with nn.parameter_scope("FAN"):
        print("Loading FAN weights...")
        nn.load_parameters(args.model)

    # Load ResNetDepth weights
    if args.landmarks_type_3D:
        with nn.parameter_scope("ResNetDepth"):
            print("Loading ResNetDepth weights...")
            nn.load_parameters(args.resnet_depth_model)

    landmarks = []
    for i, d in enumerate(detected_faces):
        center = [d[2] - (d[2] - d[0]) / 2.0, d[3] - (d[3] - d[1]) / 2.0]
        center[1] = center[1] - (d[3] - d[1]) * 0.12
        scale = (d[2] - d[0] + d[3] - d[1]) / args.reference_scale
        inp = crop(image, center, scale)
        inp = nn.Variable.from_numpy_array(inp.transpose((2, 0, 1)))
        inp = F.reshape(F.mul_scalar(inp, 1 / 255.0), (1, ) + inp.shape)
        with nn.parameter_scope("FAN"):
            out = fan(inp, args.network_size)[-1]
        pts, pts_img = get_preds_fromhm(out, center, scale)
        pts, pts_img = F.reshape(pts, (68, 2)) * \
            4, F.reshape(pts_img, (68, 2))

        if args.landmarks_type_3D:
            heatmaps = np.zeros((68, 256, 256), dtype=np.float32)
            for i in range(68):
                if pts.d[i, 0] > 0:
                    heatmaps[i] = draw_gaussian(heatmaps[i], pts.d[i], 2)
            heatmaps = nn.Variable.from_numpy_array(heatmaps)
            heatmaps = F.reshape(heatmaps, (1, ) + heatmaps.shape)
            with nn.parameter_scope("ResNetDepth"):
                depth_pred = F.reshape(
                    resnet_depth(F.concatenate(inp, heatmaps, axis=1)),
                    (68, 1))
            pts_img = F.concatenate(pts_img,
                                    depth_pred * (1.0 / (256.0 /
                                                         (200.0 * scale))),
                                    axis=1)

        landmarks.append(pts_img.d)
    visualize(landmarks, image, args.output)
예제 #26
0
    def __init__(self, train=True, shuffle=False, rng=None):
        super(Cifar100DataSource, self).__init__(shuffle=shuffle)

        # Lock
        lockfile = os.path.join(get_data_home(), "cifar100.lock")
        start_time = time.time()
        while True:  # busy-lock due to communication between process spawn by mpirun
            try:
                fd = os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR)
                break
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise
                if (time.time() - start_time) >= 60 * 30:  # wait for 30min
                    raise Exception(
                        "Timeout occured. If there are cifar10.lock in $HOME/nnabla_data, it should be deleted."
                    )

            time.sleep(5)

        self._train = train
        data_uri = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
        logger.info('Getting labeled data from {}.'.format(data_uri))
        r = download(data_uri)  # file object returned
        with tarfile.open(fileobj=r, mode="r:gz") as fpin:
            # Training data
            if train:
                images = []
                labels = []
                for member in fpin.getmembers():
                    if "train" not in member.name:
                        continue
                    fp = fpin.extractfile(member)
                    data = np.load(fp, encoding="bytes")
                    images = data[b"data"]
                    labels = data[b"fine_labels"]
                self._size = 50000
                self._images = images.reshape(self._size, 3, 32, 32)
                self._labels = np.array(labels).reshape(-1, 1)
            # Validation data
            else:
                for member in fpin.getmembers():
                    if "test" not in member.name:
                        continue
                    fp = fpin.extractfile(member)
                    data = np.load(fp, encoding="bytes")
                    images = data[b"data"]
                    labels = data[b"fine_labels"]
                self._size = 10000
                self._images = images.reshape(self._size, 3, 32, 32)
                self._labels = np.array(labels).reshape(-1, 1)
        r.close()
        logger.info('Getting labeled data from {} done.'.format(data_uri))

        self._size = self._labels.size
        self._variables = ('x', 'y')
        if rng is None:
            rng = np.random.RandomState(313)
        self.rng = rng
        self.reset()

        # Unlock
        os.close(fd)
        os.unlink(lockfile)
예제 #27
0
def generate_attribute_direction(args, attribute_prediction_model):

    if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')):
        os.makedirs(args.weights_path, exist_ok=True)
        print(
            "Downloading the pretrained tf-converted weights. Please wait...")
        url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5"
        from nnabla.utils.data_source_loader import download
        download(url, os.path.join(args.weights_path, 'gen_params.h5'), False)

    nn.load_parameters(os.path.join(args.weights_path, 'gen_params.h5'))
    print('Loaded pretrained weights from tensorflow!')

    nn.load_parameters(args.classifier_weight_path)
    print(f'Loaded {args.classifier_weight_path}')

    batches = [
        args.batch_size for _ in range(args.num_images // args.batch_size)
    ]
    if args.num_images % args.batch_size != 0:
        batches.append(args.num_images -
                       (args.num_images // args.batch_size) * args.batch_size)

    w_plus, w_minus = 0.0, 0.0
    w_plus_count, w_minus_count = 0.0, 0.0
    pbar = trange(len(batches))
    for i in pbar:
        batch_size = batches[i]
        z = [F.randn(shape=(batch_size, 512)).data]

        z = [z[0], z[0]]

        for i in range(len(z)):
            z[i] = F.div2(
                z[i],
                F.pow_scalar(F.add_scalar(
                    F.mean(z[i]**2., axis=1, keepdims=True), 1e-8),
                             0.5,
                             inplace=True))

        # get latent code
        w = [mapping_network(z[0], outmaps=512, num_layers=8)]
        w += [mapping_network(z[1], outmaps=512, num_layers=8)]

        # truncation trick
        dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg",
                                                           shape=(1, 512))
        w = [lerp(dlatent_avg, _, 0.7) for _ in w]

        constant_bc = nn.parameter.get_parameter_or_create(
            name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4))
        constant_bc = F.broadcast(constant_bc,
                                  (batch_size, ) + constant_bc.shape[1:])

        gen = synthesis(w, constant_bc, noise_seed=100, mix_after=7)

        classifier_score = F.softmax(attribute_prediction_model(gen, True))
        confidence, class_pred = F.max(classifier_score,
                                       axis=1,
                                       with_index=True,
                                       keepdims=True)

        w_plus += np.sum(w[0].data * (class_pred.data == 0) *
                         (confidence.data > 0.65),
                         axis=0,
                         keepdims=True)
        w_minus += np.sum(w[0].data * (class_pred.data == 1) *
                          (confidence.data > 0.65),
                          axis=0,
                          keepdims=True)

        w_plus_count += np.sum(
            (class_pred.data == 0) * (confidence.data > 0.65))
        w_minus_count += np.sum(
            (class_pred.data == 1) * (confidence.data > 0.65))

        pbar.set_description(f'{w_plus_count} {w_minus_count}')

    # save attribute direction
    attribute_variation_direction = (w_plus / w_plus_count) - (w_minus /
                                                               w_minus_count)
    print(w_plus_count, w_minus_count)
    np.save(f'{args.classifier_weight_path.split("/")[0]}/direction.npy',
            attribute_variation_direction)
예제 #28
0
def generate_data(args):

    if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')):
        os.makedirs(args.weights_path, exist_ok=True)
        print(
            "Downloading the pretrained tf-converted weights. Please wait...")
        url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5"
        from nnabla.utils.data_source_loader import download
        download(url, os.path.join(args.weights_path, 'gen_params.h5'), False)

    nn.load_parameters(os.path.join(args.weights_path, 'gen_params.h5'))
    print('Loaded pretrained weights from tensorflow!')

    os.makedirs(args.save_image_path, exist_ok=True)

    batches = [
        args.batch_size for _ in range(args.num_images // args.batch_size)
    ]
    if args.num_images % args.batch_size != 0:
        batches.append(args.num_images -
                       (args.num_images // args.batch_size) * args.batch_size)

    for idx, batch_size in enumerate(batches):
        z = [
            F.randn(shape=(batch_size, 512)).data,
            F.randn(shape=(batch_size, 512)).data
        ]

        for i in range(len(z)):
            z[i] = F.div2(
                z[i],
                F.pow_scalar(F.add_scalar(
                    F.mean(z[i]**2., axis=1, keepdims=True), 1e-8),
                             0.5,
                             inplace=True))

        # get latent code
        w = [mapping_network(z[0], outmaps=512, num_layers=8)]
        w += [mapping_network(z[1], outmaps=512, num_layers=8)]

        # truncation trick
        dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg",
                                                           shape=(1, 512))
        w = [lerp(dlatent_avg, _, 0.7) for _ in w]

        # Load direction
        if not args.face_morph:
            attr_delta = nn.NdArray.from_numpy_array(
                np.load(args.attr_delta_path))
            attr_delta = F.reshape(attr_delta[0], (1, -1))
            w_plus = [w[0] + args.coeff * attr_delta, w[1]]
            w_minus = [w[0] - args.coeff * attr_delta, w[1]]
        else:
            w_plus = [w[0], w[0]]  # content
            w_minus = [w[1], w[1]]  # style

        constant_bc = nn.parameter.get_parameter_or_create(
            name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4))
        constant_bc = F.broadcast(constant_bc,
                                  (batch_size, ) + constant_bc.shape[1:])

        gen_plus = synthesis(w_plus, constant_bc, noise_seed=100, mix_after=8)
        gen_minus = synthesis(w_minus,
                              constant_bc,
                              noise_seed=100,
                              mix_after=8)
        gen = synthesis(w, constant_bc, noise_seed=100, mix_after=8)

        image_plus = convert_images_to_uint8(gen_plus, drange=[-1, 1])
        image_minus = convert_images_to_uint8(gen_minus, drange=[-1, 1])
        image = convert_images_to_uint8(gen, drange=[-1, 1])

        for j in range(batch_size):
            filepath = os.path.join(args.save_image_path,
                                    f'image_{idx*batch_size+j}')
            imsave(f'{filepath}_o.png', image_plus[j], channel_first=True)
            imsave(f'{filepath}_y.png', image_minus[j], channel_first=True)
            imsave(f'{filepath}.png', image[j], channel_first=True)
            print(f"Genetated. Saved {filepath}")
예제 #29
0
    def __init__(self, train=True, shuffle=False, rng=None):
        super(Cifar10DataSource, self).__init__(shuffle=shuffle)

        # Lock
        lockfile = os.path.join(get_data_home(), "cifar10.lock")
        start_time = time.time()
        while True:  # busy-lock due to communication between process spawn by mpirun
            try:
                fd = os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR)
                break
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise
                if (time.time() - start_time) >= 60 * 30:  # wait for 30min
                    raise Exception(
                        "Timeout occured. If there are cifar10.lock in $HOME/nnabla_data, it should be deleted.")

            time.sleep(5)

        self._train = train
        data_uri = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
        logger.info('Getting labeled data from {}.'.format(data_uri))
        r = download(data_uri)  # file object returned
        with tarfile.open(fileobj=r, mode="r:gz") as fpin:
            # Training data
            if train:
                images = []
                labels = []
                for member in fpin.getmembers():
                    if "data_batch" not in member.name:
                        continue
                    fp = fpin.extractfile(member)
                    data = np.load(fp, encoding="bytes")
                    images.append(data[b"data"])
                    labels.append(data[b"labels"])
                self._size = 50000
                self._images = np.concatenate(
                    images).reshape(self._size, 3, 32, 32)
                self._labels = np.concatenate(labels).reshape(-1, 1)
            # Validation data
            else:
                for member in fpin.getmembers():
                    if "test_batch" not in member.name:
                        continue
                    fp = fpin.extractfile(member)
                    data = np.load(fp, encoding="bytes")
                    images = data[b"data"]
                    labels = data[b"labels"]
                self._size = 10000
                self._images = images.reshape(self._size, 3, 32, 32)
                self._labels = np.array(labels).reshape(-1, 1)
        r.close()
        logger.info('Getting labeled data from {}.'.format(data_uri))

        self._size = self._labels.size
        self._variables = ('x', 'y')
        if rng is None:
            rng = np.random.RandomState(313)
        self.rng = rng
        self.reset()

        # Unlock
        os.close(fd)
        os.unlink(lockfile)