def __init__(self, param_path=None): assert os.path.isfile( param_path), "pretrained VGG19 weights not found." self.h5_file = param_path if not os.path.exists(self.h5_file): print( "Pretrained VGG19 parameters not found. Downloading. Please wait..." ) url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/first-order-model/vgg19.h5" from nnabla.utils.data_source_loader import download download(url, url.split('/')[-1], False) with nn.parameter_scope("VGG19"): logger.info('loading vgg19 parameters...') nn.load_parameters(self.h5_file) # drop all the affine layers. drop_layers = [ 'classifier/0/affine', 'classifier/3/affine', 'classifier/6/affine' ] for layers in drop_layers: nn.parameter.pop_parameter((layers + '/W')) nn.parameter.pop_parameter((layers + '/b')) self.mean = nn.Variable.from_numpy_array( np.asarray([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)) self.std = nn.Variable.from_numpy_array( np.asarray([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1))
def load_checkpoint(self, args): """Load pretrained parameters and solver states Args: args (ArgumentParser): To check if tensorflow trained weights are to be used for testing and to get the path of the folder from where the parameter and solver states are to be loaded """ if args.use_tf_weights: if not os.path.isfile( os.path.join(args.weights_path, 'gen_params.h5')): os.makedirs(args.weights_path, exist_ok=True) print( "Downloading the pretrained tf-converted weights. Please wait..." ) url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5" from nnabla.utils.data_source_loader import download download(url, os.path.join(args.weights_path, 'gen_params.h5'), False) nn.load_parameters(os.path.join(args.weights_path, 'gen_params.h5')) print('Loaded pretrained weights from tensorflow!') else: try: nn.load_parameters(os.path.join(args.weights_path, 'params.h5')) except: if args.test: warnings.warn( "Testing Model without pretrained weights!!!") else: print('No Pretrained weights loaded.')
def load_checkpoint(self, args): """Load pretrained parameters and solver states Args: args (ArgumentParser): To check if tensorflow trained weights are to be used for testing and to get the path of the folder from where the parameter and solver states are to be loaded """ if args.use_tf_weights: if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')): os.makedirs(args.weights_path, exist_ok=True) print("Downloading the pretrained tf-converted weights. Please wait...") url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5" from nnabla.utils.data_source_loader import download download(url, os.path.join( args.weights_path, 'gen_params.h5'), False) nn.load_parameters(os.path.join( args.weights_path, 'gen_params.h5')) print('Loaded pretrained weights from tensorflow!') else: try: if args.pre_trained_model is not None: if os.path.isfile(args.pre_trained_model): nn.load_parameters(args.pre_trained_model) elif os.path.isfile(os.path.join(args.pre_trained_model, 'ffhq-slim-gen-256-config-e.h5')): if args.train: with nn.parameter_scope('Generator'): nn.load_parameters(os.path.join( args.pre_trained_model, 'ffhq-slim-gen-256-config-e.h5')) with nn.parameter_scope('GeneratorEMA'): nn.load_parameters(os.path.join( args.pre_trained_model, 'ffhq-slim-gen-256-config-e.h5')) else: nn.load_parameters(os.path.join( args.pre_trained_model, 'ffhq-slim-gen-256-config-e.h5')) nn.load_parameters(os.path.join( args.pre_trained_model, 'ffhq-slim-gen-256-config-e.h5')) if os.path.isfile(os.path.join(args.pre_trained_model, 'ffhq-slim-disc-256-config-e-corrected.h5')): nn.load_parameters(os.path.join( args.pre_trained_model, 'ffhq-slim-disc-256-config-e-corrected.h5')) if os.path.isdir(args.weights_path): with nn.parameter_scope('Discriminator'): nn.load_parameters(os.path.join( args.weights_path, 'disc_params.h5')) with nn.parameter_scope('Generator'): nn.load_parameters(os.path.join( args.weights_path, 'gen_params.h5')) with nn.parameter_scope('GeneratorEMA'): nn.load_parameters(os.path.join( args.weights_path, 'gen_ema_params.h5')) except: if args.test: warnings.warn( "Testing Model without pretrained weights!!!") else: print('No Pretrained weights loaded.')
def download_provided_file(url, filepath=None, verbose=True): if not filepath: filepath = os.path.basename(url) if not os.path.exists(filepath): if verbose: logger.info(f"{filepath} not found. Downloading...") download(url, filepath, False) if verbose: logger.info(f"Downloaded {filepath}.") return
def load_imdb(vocab_size: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: file_name = 'imdb.npz' url = f'https://s3.amazonaws.com/text-datasets/{file_name}' download(url, open_file=False) dataset_path = Path(get_data_home()) / file_name unk_index = vocab_size - 1 raw = load_npy(dataset_path) ret = dict() for k, v in raw.items(): if 'x' in k: for i, sentence in enumerate(v): v[i] = [word if word < unk_index else unk_index for word in sentence] ret[k] = v return ret['x_train'], ret['x_test'], ret['y_train'], ret['y_test']
def __init__(self, train=True, shuffle=False, rng=None, output_dir=None): super(STL10DataSource, self).__init__(shuffle=shuffle, rng=rng) self._train = train data_uri = 'http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz' logger.info('Getting labbeled data from {}'.format(data_uri)) default_seed = 313 output_file = get_filename_to_download(output_dir, data_uri) r = download(data_uri, output_file=output_file) # file object returned print(r.name) binary_dir = os.path.join(output_dir, "stl10_binary") with tarfile.open(fileobj=r, mode="r:gz") as tar: tar.extractall(path=output_dir) for member in os.listdir(binary_dir): if train: if 'train_' not in member: continue print(member) self.load_image_and_labels(os.path.join(binary_dir, member)) # Validation data else: print(member) if 'test_' not in member: continue self.load_image_and_labels(os.path.join(binary_dir, member)) r.close logger.info('Getting labeled data from {}'.format(data_uri)) self._size = self._labels.size self._variables = ('x', 'y') if rng is None: rng = np.random.RandomState(default_seed) self.rng = rng self.reset()
def __init__(self, width, height, padding, train=True, shuffle=False, rng=None): super(Caltech101DataSource, self).__init__(shuffle=shuffle, rng=rng) data_uri = "http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz" logger.info('Getting labeled data from {}.'.format(data_uri)) r = download(data_uri) # file object returned label_dict = dict() with tarfile.open(fileobj=r, mode="r:gz") as fpin: images = [] labels = [] for name in fpin.getnames(): if ".jpg" not in name or "Google" in name: continue label, filename = name.split("/")[-2:] if label not in label_dict: label_dict[label] = len(label_dict) im = imread(fpin.extractfile(name), num_channels=3) arranged_images = self._resize_image( im, width, height, padding) images.append(arranged_images) labels.append(label_dict[label]) self._size = len(images) self._images = np.array(images) self._labels = np.array(labels).reshape(-1, 1) r.close() logger.info('Getting labeled data from {}.'.format(data_uri)) self._size = self._labels.size self._variables = ('x', 'y') if rng is None: rng = np.random.RandomState(313) self.rng = rng self._indexes = rng.permutation(self._size)
def __init__(self, train=True, shuffle=False, rng=None, label_shuffle=True, label_shuffle_rate=0.1): super(Cifar10DataSource, self).__init__(shuffle=shuffle, rng=rng) self._train = train data_uri = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" logger.info('Getting labeled data from {}.'.format(data_uri)) r = download(data_uri) # file object returned with tarfile.open(fileobj=r, mode="r:gz") as fpin: # Training data if train: images = [] labels = [] for member in fpin.getmembers(): if "data_batch" not in member.name: continue fp = fpin.extractfile(member) data = np.load(fp, encoding="bytes", allow_pickle=True) images.append(data[b"data"]) labels.append(data[b"labels"]) self._size = 50000 self._images = np.concatenate(images).reshape( self._size, 3, 32, 32) self._labels = np.concatenate(labels).reshape(-1, 1) self.raw_label = self._labels.copy() if label_shuffle: self.shuffle_rate = label_shuffle_rate self.label_shuffle() print(f"{self.shuffle_rate*100}% of data was shuffled ") print(len(np.where(self._labels != self.raw_label)[0])) # Validation data else: for member in fpin.getmembers(): if "test_batch" not in member.name: continue fp = fpin.extractfile(member) data = np.load(fp, encoding="bytes", allow_pickle=True) images = data[b"data"] labels = data[b"labels"] self._size = 10000 self._images = images.reshape(self._size, 3, 32, 32) self._labels = np.array(labels).reshape(-1, 1) r.close() logger.info('Getting labeled data from {}.'.format(data_uri)) self._size = self._labels.size self._variables = ('x', 'y') if rng is None: rng = np.random.RandomState(313) self.rng = rng self.reset()
def main(): args = get_args() nn.set_default_context( get_extension_context(args.extension, device_id=args.device_id)) if args.nnp is None: local_nnp_dir = os.path.join("asset", args.gym_env) local_nnp_file = os.path.join(local_nnp_dir, "qnet.nnp") if not find_local_nnp(args.gym_env): logger.info("Downloading nnp data since you didn't specify...") nnp_uri = os.path.join( "https://nnabla.org/pretrained-models/nnp_models/examples/dqn", args.gym_env, "qnet.nnp") if not os.path.exists(local_nnp_dir): os.mkdir(local_nnp_dir) download(nnp_uri, output_file=local_nnp_file, open_file=False) logger.info("Download done!") args.nnp = local_nnp_file from atari_utils import make_atari_deepmind env = make_atari_deepmind(args.gym_env, valid=False) print('Observation:', env.observation_space) print('Action:', env.action_space) obs_sampler = ObsSampler(args.num_frames) val_replay_memory = ReplayMemory(env.observation_space.shape, env.action_space.shape, max_memory=args.num_frames) # just play greedily explorer = GreedyExplorer(env.action_space.n, use_nnp=True, nnp_file=args.nnp, name='qnet') validator = Validator(env, val_replay_memory, explorer, obs_sampler, num_episodes=1, render=not args.no_render) while True: validator.step()
def __init__(self, train=True, shuffle=False, rng=None, output_dir=None): super(MnistDataSource, self).__init__(shuffle=shuffle) self._train = train if self._train: image_uri = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz' label_uri = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz' else: image_uri = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz' label_uri = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz' logger.info('Getting label data from {}.'.format(label_uri)) # With python3 we can write this logic as following, but with # python2, gzip.object does not support file-like object and # urllib.request does not support 'with statement'. # # with request.urlopen(label_uri) as r, gzip.open(r) as f: # _, size = struct.unpack('>II', f.read(8)) # self._labels = numpy.frombuffer(f.read(), numpy.uint8).reshape(-1, 1) # label_output_file = get_filename_to_download(output_dir, label_uri) # file object returned r = download(label_uri, output_file=label_output_file) data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32) _, size = struct.unpack('>II', data[0:8]) self._labels = numpy.frombuffer(data[8:], numpy.uint8).reshape(-1, 1) r.close() logger.info('Getting label data done.') logger.info('Getting image data from {}.'.format(image_uri)) image_output_file = get_filename_to_download(output_dir, image_uri) r = download(image_uri, output_file=image_output_file) data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32) _, size, height, width = struct.unpack('>IIII', data[0:16]) self._images = numpy.frombuffer(data[16:], numpy.uint8).reshape( size, 1, height, width) r.close() logger.info('Getting image data done.') self._size = self._labels.size self._variables = ('x', 'y') if rng is None: rng = numpy.random.RandomState(313) self.rng = rng self.reset()
def load_mnist(train=True): ''' Load MNIST dataset images and labels from the original page by Yan LeCun or the cache file. Args: train (bool): The testing dataset will be returned if False. Training data has 60000 images, while testing has 10000 images. Returns: numpy.ndarray: A shape of (#images, 1, 28, 28). Values in [0.0, 1.0]. numpy.ndarray: A shape of (#images, 1). Values in {0, 1, ..., 9}. ''' if train: image_uri = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz' label_uri = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz' else: image_uri = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz' label_uri = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz' logger.info('Getting label data from {}.'.format(label_uri)) # With python3 we can write this logic as following, but with # python2, gzip.object does not support file-like object and # urllib.request does not support 'with statement'. # # with request.urlopen(label_uri) as r, gzip.open(r) as f: # _, size = struct.unpack('>II', f.read(8)) # labels = numpy.frombuffer(f.read(), numpy.uint8).reshape(-1, 1) # r = download(label_uri) data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32) _, size = struct.unpack('>II', data[0:8]) labels = numpy.frombuffer(data[8:], numpy.uint8).reshape(-1, 1) r.close() logger.info('Getting label data done.') logger.info('Getting image data from {}.'.format(image_uri)) r = download(image_uri) data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32) _, size, height, width = struct.unpack('>IIII', data[0:16]) images = numpy.frombuffer(data[16:], numpy.uint8).reshape( size, 1, height, width) r.close() logger.info('Getting image data done.') return images, labels
def main(): output_nnabla_file = 'nbla_bert_params.h5' r = download( "https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip", 'uncased_L-12_H-768_A-12.zip') with zipfile.ZipFile("uncased_L-12_H-768_A-12.zip", "r") as zip_ref: zip_ref.extractall(".") input_ckpt_file = 'uncased_L-12_H-768_A-12/bert_model.ckpt' output_nnabla_file = 'nbla_bert_params.h5' convert(input_ckpt_file, output_nnabla_file)
def __init__(self, train=True, shuffle=False, rng=None): super(MnistDataSource, self).__init__(shuffle=shuffle) self._train = train if self._train: image_uri = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz' label_uri = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz' else: image_uri = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz' label_uri = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz' logger.info('Getting label data from {}.'.format(label_uri)) # With python3 we can write this logic as following, but with # python2, gzip.object does not support file-like object and # urllib.request does not support 'with statement'. # # with request.urlopen(label_uri) as r, gzip.open(r) as f: # _, size = struct.unpack('>II', f.read(8)) # self._labels = numpy.frombuffer(f.read(), numpy.uint8).reshape(-1, 1) # r = download(label_uri) data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32) _, size = struct.unpack('>II', data[0:8]) self._labels = numpy.frombuffer(data[8:], numpy.uint8).reshape(-1, 1) r.close() logger.info('Getting label data done.') logger.info('Getting image data from {}.'.format(image_uri)) r = download(image_uri) data = zlib.decompress(r.read(), zlib.MAX_WBITS | 32) _, size, height, width = struct.unpack('>IIII', data[0:16]) self._images = numpy.frombuffer(data[16:], numpy.uint8).reshape( size, 1, height, width) r.close() logger.info('Getting image data done.') self._size = self._labels.size self._variables = ('x', 'y') if rng is None: rng = numpy.random.RandomState(313) self.rng = rng self.reset()
def load_cyclegan_dataset(dataset="horse2zebra", train=True, domain="A", normalize_method=lambda x: (x - 127.5) / 127.5): ''' Load CycleGAN dataset from `here <https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/>`_ This function assumes that there are two domains in the dataset. Args: dataset (str): Dataset name excluding ".zip" extension, which you can find that `here <https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/>`_. train (bool): The testing dataset will be returned if False. Training data has 60000 images, while testing has 10000 images. domain (str): Domain name. It must be "A" or "B". normalize_method: Function of how to normalize an image. Returns: (np.ndarray, list): Images and filenames. ''' assert domain in ["A", "B"] image_uri = 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/{}.zip'.format( dataset) logger.info('Getting {} data from {}.'.format(dataset, image_uri)) r = download(image_uri) # Load unpaired images from zipfile. with zipfile.ZipFile(r, "r") as zf: images = [] filename_list = [] dirname = "{}{}".format("train" if train else "test", domain) # filter images by name zipinfos = filter( lambda zinfo: dirname in zinfo.filename and ".jpg" in zinfo. filename, zf.infolist()) for zipinfo in zipinfos: with zf.open(zipinfo.filename, "r") as fp: # filename filename = zipinfo.filename logger.info('loading {}'.format(filename)) # load image image = scipy.misc.imread(fp, mode="RGB") #image = scipy.misc.imread(fp) image = np.transpose(image, (2, 0, 1)) image = normalize_method(image) image_name, ext = os.path.splitext(filename.split("/")[-1]) images.append(image) filename_list.append(image_name) r.close() logger.info('Getting image data done.') return np.asarray(images), filename_list
def download_tiny_imagenet(): url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip" dir_data = os.path.join(get_data_home(), 'tiny-imagenet-200') if not os.path.isdir(dir_data): f = download(url) logger.info('Extracting {} ...'.format(f.name)) z = zipfile.ZipFile(f) d = get_data_home() l = z.namelist() for i in tqdm(range(len(l))): z.extract(l[i], d) z.close() f.close() return dir_data
def __init__(self, train=True, shuffle=False, rng=None): super(Cifar100DataSource, self).__init__(shuffle=shuffle, rng=rng) self._train = train data_uri = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" logger.info('Getting labeled data from {}.'.format(data_uri)) r = download(data_uri) # file object returned with tarfile.open(fileobj=r, mode="r:gz") as fpin: # Training data if train: images = [] labels = [] for member in fpin.getmembers(): if "train" not in member.name: continue fp = fpin.extractfile(member) data = np.load(fp, encoding="bytes", allow_pickle=True) images = data[b"data"] labels = data[b"fine_labels"] self._size = 50000 self._images = images.reshape(self._size, 3, 32, 32) self._labels = np.array(labels).reshape(-1, 1) # Validation data else: for member in fpin.getmembers(): if "test" not in member.name: continue fp = fpin.extractfile(member) data = np.load(fp, encoding="bytes", allow_pickle=True) images = data[b"data"] labels = data[b"fine_labels"] self._size = 10000 self._images = images.reshape(self._size, 3, 32, 32) self._labels = np.array(labels).reshape(-1, 1) r.close() logger.info('Getting labeled data from {} done.'.format(data_uri)) self._size = self._labels.size self._variables = ('x', 'y') if rng is None: rng = np.random.RandomState(313) self.rng = rng self.reset()
def load_omniglot(test=False): fname = "images_background.zip" if test: fname = "images_evaluation.zip" dlname = OMNIGLOT_URL + fname r = download(dlname) f = zipfile.ZipFile(r, mode="r") x = [] y = [] imgs = [] lang_dict = {} n_letter = 0 for path in f.namelist(): # Four types of "path" is possible # "image_xxx/" # "image_xxx/Alphabet" # "image_xxx/Alphabet/Letter" # "image_xxx/Alphabet/Letter/img.png" names = path.split('/') if len(names) == 3: # i.e. [images_xxx, Alphabet, None] alphabet = names[1] print("loading alphabet: " + alphabet) lang_dict[alphabet] = [n_letter, None] if len(names) == 4: if names[ 3] is not '': # i.e. [images_xxx, Alphabet, Letter, Image] imgs.append(imageio.imread(f.read(path))) if len(imgs) == 20: # Number of images are limited to 20 x.append(np.stack(imgs)) y.append(np.ones(20, ) * n_letter) n_letter += 1 lang_dict[alphabet][1] = n_letter - 1 imgs = [] x = np.stack(x) y = np.stack(y) return x, y, lang_dict
def download_tiny_imagenet(): """ Downloads and unzips the tiny imagenet dataset. Returns: dir_data: the directory of the data containing the tiny imagenet """ url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip" dir_data = os.path.join('datasets', 'tiny-imagenet-200') if not os.path.isdir(dir_data): f = download(url) logger.info('Extracting {} ...'.format(f.name)) z = zipfile.ZipFile(f) d = 'datasets/' l = z.namelist() for i in tqdm(range(len(l))): z.extract(l[i], d) z.close() f.close() return dir_data
def _load_data(url: str): with download(url, open_file=True) as f: lines = f.read().decode('utf-8').replace('\n', ' <eos> ') words = lines.strip().split() dataset = np.ndarray((len(words), ), dtype=np.int32) for i, word in enumerate(words): if word not in w2i: w2i[word] = len(w2i) if w2i[word] not in i2w: i2w[w2i[word]] = word dataset[i] = w2i[word] sentences = [] sentence = [] for index in dataset: if i2w[index] != '<eos>': sentence.append(index) else: sentences.append(sentence) sentence = [] return sentences
def load_pix2pix_dataset(dataset="edges2shoes", train=True, num_samples=-1): image_uri = 'https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/{}.tar.gz'\ .format(dataset) logger.info('Getting {} data from {}.'.format(dataset, image_uri)) r = download(image_uri) # Load concatenated images, then save separately # TODO: how to do for test img_A_list = [] img_B_list = [] img_names = [] with tarfile.open(fileobj=r, mode="r") as tar: cnt = 0 for tinfo in tar.getmembers(): if not ".jpg" in tinfo.name: continue if not ((train == True and "train" in tinfo.name) or (train == False and "val" in tinfo.name)): continue logger.info("Loading {} ...".format(tinfo.name)) f = tar.extractfile(tinfo) img = scipy.misc.imread(f, mode="RGB") h, w, c = img.shape img_A = img[:, 0:w // 2, :].transpose((2, 0, 1)) img_B = img[:, w // 2:, :].transpose((2, 0, 1)) img_A_list.append(img_A) img_B_list.append(img_B) img_names.append(tinfo.name.split("/")[-1]) cnt += 1 if num_samples != -1 and cnt >= num_samples: break r.close() logger.info('Getting image data done.') img_A, img_B = np.asarray(img_A_list), np.asarray(img_B_list) return img_A, img_B, img_names
def load_ptbset(ptbfile): """ Load Penn Treebank Corpus """ f = download(ptbfile) itow = {} # index to word wtoi = {} # word to index dataset = [] # extract vocabraries from corpus for line in f: for w in line.split(): # register the new word as an index number of first appearance if w not in wtoi: i = len(wtoi) wtoi[w] = i itow[i] = w # translate words into numbers dataset.append(wtoi[w]) return itow, wtoi, dataset
def _load_data(self, type_name: str) -> List[List[int]]: url = self.ptb_url.format(type_name) with download(url, open_file=True) as f: lines: str = f.read().decode('utf-8').replace('\n', '<eos>') if self.return_char_info: for char in set(lines): if char not in self.c2i: self.c2i[char] = len(self.c2i) if self.c2i[char] not in self.i2c: self.i2c[self.c2i[char]] = char words = lines.strip().split() dataset = np.ndarray((len(words), ), dtype=np.int32) for i, word in enumerate(words): if word not in self.w2i: self.w2i[word] = len(self.w2i) if self.w2i[word] not in self.i2w: self.i2w[self.w2i[word]] = word dataset[i] = self.w2i[word] sentences = [] sentence = [] if self.with_bos: sentence.append(self.w2i['<bos>']) for index in dataset: if self.i2w[index] != '<eos>': sentence.append(index) else: sentence.append(index) sentences.append(sentence) sentence = [] if self.with_bos: sentence.append(self.w2i['<bos>']) return sentences
def main(): parser = argparse.ArgumentParser() parser.add_argument('--output-filename', '-o', type=str, default=None, help="name of an output image file.") parser.add_argument('--output-dir', '-d', type=str, default="results", help="directory where the generated image is saved.") parser.add_argument('--seed', type=int, required=True, help="seed for primal style noise.") parser.add_argument('--stochastic-seed', type=int, default=1, help="seed for noises added to intermediate features.") parser.add_argument('--truncation-psi', default=0.5, type=float, help="value for truncation trick.") parser.add_argument('--batch-size', type=int, default=1, help="Number of images to generate.") parser.add_argument( '--mixing', action='store_true', help="if specified, apply style mixing with additional seed.") parser.add_argument('--seed-mix', type=int, default=None, help="seed for another / secondary style noise.") parser.add_argument('--mix-after', type=int, default=7, help="after this layer, style mixing is applied.") parser.add_argument('--context', '-c', type=str, default="cudnn", help="context. cudnn is recommended.") args = parser.parse_args() assert 0 < args.mix_after < 17, "specify --mix-after from 1 to 16." if not os.path.isfile("styleGAN2_G_params.h5"): print("Downloading the pretrained weight. Please wait...") url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5" from nnabla.utils.data_source_loader import download download(url, url.split('/')[-1], False) ctx = get_extension_context(args.context) nn.set_default_context(ctx) batch_size = args.batch_size num_layers = 18 rnd = np.random.RandomState(args.seed) z = rnd.randn(batch_size, 512) print("Generation started...") print(f"truncation value: {args.truncation_psi}") print(f"seed for additional noise: {args.stochastic_seed}") # Inference via nn.NdArray utilizes significantly less memory if args.mixing: # apply style mixing assert args.seed_mix print( f"using style noise seed {args.seed} for layers 0-{args.mix_after - 1}" ) print( f"using style noise seed {args.seed_mix} for layers {args.mix_after}-{num_layers}." ) rnd = np.random.RandomState(args.seed_mix) z2 = rnd.randn(batch_size, 512) style_noises = [nn.NdArray.from_numpy_array(z)] style_noises += [nn.NdArray.from_numpy_array(z2)] else: # no style mixing (single noise / style is used) print(f"using style noise seed {args.seed} for entire layers.") style_noises = [nn.NdArray.from_numpy_array(z) for _ in range(2)] nn.set_auto_forward(True) nn.load_parameters("styleGAN2_G_params.h5") rgb_output = generate(batch_size, style_noises, args.stochastic_seed, args.mix_after, args.truncation_psi) # convert to uint8 to save an image file image = convert_images_to_uint8(rgb_output, drange=[-1, 1]) if args.output_filename is None: if not args.mixing: filename = f"seed{args.seed}" else: filename = f"seed{args.seed}_{args.seed_mix}" else: filename = args.output_filename os.makedirs(args.output_dir, exist_ok=True) for i in range(batch_size): filepath = os.path.join(args.output_dir, f'{filename}_{i}.png') imsave(filepath, image[i], channel_first=True) print(f"Genetation completed. Saved {filepath}.")
nn.set_default_context(ctx) """ """ embedding_size: int = 2 batch_size: int = 1 max_epoch: int = 50 negative_sample_size = 10 file_url = 'https://raw.githubusercontent.com/qiangsiwei/poincare_embedding/master/data/mammal_subtree.tsv' from functools import reduce import operator import random with download(file_url, open_file=True) as f: lines: str = f.read().decode('utf-8').split('\n') pdata = list(map(lambda l: l.split('\t'), filter(None, lines))) pdict = {w: i for i, w in enumerate(set(reduce(operator.add, pdata)))} vocab_size: int = len(pdict) num_train_batch = len(pdata) // batch_size def load_train_func(index): x, y = pdata[index] negative_sample_prob = np.ones(len(pdict)) negative_sample_prob[pdict[x]] = 0.0 negative_sample_prob[pdict[y]] = 0.0 negative_sample_prob /= len(pdict) - 2
def main(): args = get_args() # Get context from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) nn.set_auto_forward(True) image = io.imread(args.test_image) if image.ndim == 2: image = color.gray2rgb(image) elif image.shape[-1] == 4: image = image[..., :3] if args.context == 'cudnn': if not os.path.isfile(args.cnn_face_detction_model): # Block of bellow code will download the cnn based face-detection model file provided by dlib for face detection # and will save it in the directory where this script is executed. print("Downloading the face detection CNN. Please wait...") url = "http://dlib.net/files/mmod_human_face_detector.dat.bz2" from nnabla.utils.data_source_loader import download download(url, url.split('/')[-1], False) # get the decompressed data. data = bz2.BZ2File(url.split('/')[-1]).read() # write to dat file. open(url.split('/')[-1][:-4], 'wb').write(data) face_detector = dlib.cnn_face_detection_model_v1( args.cnn_face_detction_model) detected_faces = face_detector( cv2.cvtColor(image[..., ::-1].copy(), cv2.COLOR_BGR2GRAY)) detected_faces = [[ d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom() ] for d in detected_faces] else: face_detector = dlib.get_frontal_face_detector() detected_faces = face_detector( cv2.cvtColor(image[..., ::-1].copy(), cv2.COLOR_BGR2GRAY)) detected_faces = [[d.left(), d.top(), d.right(), d.bottom()] for d in detected_faces] if len(detected_faces) == 0: print("Warning: No faces were detected.") return None # Load FAN weights with nn.parameter_scope("FAN"): print("Loading FAN weights...") nn.load_parameters(args.model) # Load ResNetDepth weights if args.landmarks_type_3D: with nn.parameter_scope("ResNetDepth"): print("Loading ResNetDepth weights...") nn.load_parameters(args.resnet_depth_model) landmarks = [] for i, d in enumerate(detected_faces): center = [d[2] - (d[2] - d[0]) / 2.0, d[3] - (d[3] - d[1]) / 2.0] center[1] = center[1] - (d[3] - d[1]) * 0.12 scale = (d[2] - d[0] + d[3] - d[1]) / args.reference_scale inp = crop(image, center, scale) inp = nn.Variable.from_numpy_array(inp.transpose((2, 0, 1))) inp = F.reshape(F.mul_scalar(inp, 1 / 255.0), (1, ) + inp.shape) with nn.parameter_scope("FAN"): out = fan(inp, args.network_size)[-1] pts, pts_img = get_preds_fromhm(out, center, scale) pts, pts_img = F.reshape(pts, (68, 2)) * \ 4, F.reshape(pts_img, (68, 2)) if args.landmarks_type_3D: heatmaps = np.zeros((68, 256, 256), dtype=np.float32) for i in range(68): if pts.d[i, 0] > 0: heatmaps[i] = draw_gaussian(heatmaps[i], pts.d[i], 2) heatmaps = nn.Variable.from_numpy_array(heatmaps) heatmaps = F.reshape(heatmaps, (1, ) + heatmaps.shape) with nn.parameter_scope("ResNetDepth"): depth_pred = F.reshape( resnet_depth(F.concatenate(inp, heatmaps, axis=1)), (68, 1)) pts_img = F.concatenate(pts_img, depth_pred * (1.0 / (256.0 / (200.0 * scale))), axis=1) landmarks.append(pts_img.d) visualize(landmarks, image, args.output)
def __init__(self, train=True, shuffle=False, rng=None): super(Cifar100DataSource, self).__init__(shuffle=shuffle) # Lock lockfile = os.path.join(get_data_home(), "cifar100.lock") start_time = time.time() while True: # busy-lock due to communication between process spawn by mpirun try: fd = os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR) break except OSError as e: if e.errno != errno.EEXIST: raise if (time.time() - start_time) >= 60 * 30: # wait for 30min raise Exception( "Timeout occured. If there are cifar10.lock in $HOME/nnabla_data, it should be deleted." ) time.sleep(5) self._train = train data_uri = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" logger.info('Getting labeled data from {}.'.format(data_uri)) r = download(data_uri) # file object returned with tarfile.open(fileobj=r, mode="r:gz") as fpin: # Training data if train: images = [] labels = [] for member in fpin.getmembers(): if "train" not in member.name: continue fp = fpin.extractfile(member) data = np.load(fp, encoding="bytes") images = data[b"data"] labels = data[b"fine_labels"] self._size = 50000 self._images = images.reshape(self._size, 3, 32, 32) self._labels = np.array(labels).reshape(-1, 1) # Validation data else: for member in fpin.getmembers(): if "test" not in member.name: continue fp = fpin.extractfile(member) data = np.load(fp, encoding="bytes") images = data[b"data"] labels = data[b"fine_labels"] self._size = 10000 self._images = images.reshape(self._size, 3, 32, 32) self._labels = np.array(labels).reshape(-1, 1) r.close() logger.info('Getting labeled data from {} done.'.format(data_uri)) self._size = self._labels.size self._variables = ('x', 'y') if rng is None: rng = np.random.RandomState(313) self.rng = rng self.reset() # Unlock os.close(fd) os.unlink(lockfile)
def generate_attribute_direction(args, attribute_prediction_model): if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')): os.makedirs(args.weights_path, exist_ok=True) print( "Downloading the pretrained tf-converted weights. Please wait...") url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5" from nnabla.utils.data_source_loader import download download(url, os.path.join(args.weights_path, 'gen_params.h5'), False) nn.load_parameters(os.path.join(args.weights_path, 'gen_params.h5')) print('Loaded pretrained weights from tensorflow!') nn.load_parameters(args.classifier_weight_path) print(f'Loaded {args.classifier_weight_path}') batches = [ args.batch_size for _ in range(args.num_images // args.batch_size) ] if args.num_images % args.batch_size != 0: batches.append(args.num_images - (args.num_images // args.batch_size) * args.batch_size) w_plus, w_minus = 0.0, 0.0 w_plus_count, w_minus_count = 0.0, 0.0 pbar = trange(len(batches)) for i in pbar: batch_size = batches[i] z = [F.randn(shape=(batch_size, 512)).data] z = [z[0], z[0]] for i in range(len(z)): z[i] = F.div2( z[i], F.pow_scalar(F.add_scalar( F.mean(z[i]**2., axis=1, keepdims=True), 1e-8), 0.5, inplace=True)) # get latent code w = [mapping_network(z[0], outmaps=512, num_layers=8)] w += [mapping_network(z[1], outmaps=512, num_layers=8)] # truncation trick dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg", shape=(1, 512)) w = [lerp(dlatent_avg, _, 0.7) for _ in w] constant_bc = nn.parameter.get_parameter_or_create( name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4)) constant_bc = F.broadcast(constant_bc, (batch_size, ) + constant_bc.shape[1:]) gen = synthesis(w, constant_bc, noise_seed=100, mix_after=7) classifier_score = F.softmax(attribute_prediction_model(gen, True)) confidence, class_pred = F.max(classifier_score, axis=1, with_index=True, keepdims=True) w_plus += np.sum(w[0].data * (class_pred.data == 0) * (confidence.data > 0.65), axis=0, keepdims=True) w_minus += np.sum(w[0].data * (class_pred.data == 1) * (confidence.data > 0.65), axis=0, keepdims=True) w_plus_count += np.sum( (class_pred.data == 0) * (confidence.data > 0.65)) w_minus_count += np.sum( (class_pred.data == 1) * (confidence.data > 0.65)) pbar.set_description(f'{w_plus_count} {w_minus_count}') # save attribute direction attribute_variation_direction = (w_plus / w_plus_count) - (w_minus / w_minus_count) print(w_plus_count, w_minus_count) np.save(f'{args.classifier_weight_path.split("/")[0]}/direction.npy', attribute_variation_direction)
def generate_data(args): if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')): os.makedirs(args.weights_path, exist_ok=True) print( "Downloading the pretrained tf-converted weights. Please wait...") url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5" from nnabla.utils.data_source_loader import download download(url, os.path.join(args.weights_path, 'gen_params.h5'), False) nn.load_parameters(os.path.join(args.weights_path, 'gen_params.h5')) print('Loaded pretrained weights from tensorflow!') os.makedirs(args.save_image_path, exist_ok=True) batches = [ args.batch_size for _ in range(args.num_images // args.batch_size) ] if args.num_images % args.batch_size != 0: batches.append(args.num_images - (args.num_images // args.batch_size) * args.batch_size) for idx, batch_size in enumerate(batches): z = [ F.randn(shape=(batch_size, 512)).data, F.randn(shape=(batch_size, 512)).data ] for i in range(len(z)): z[i] = F.div2( z[i], F.pow_scalar(F.add_scalar( F.mean(z[i]**2., axis=1, keepdims=True), 1e-8), 0.5, inplace=True)) # get latent code w = [mapping_network(z[0], outmaps=512, num_layers=8)] w += [mapping_network(z[1], outmaps=512, num_layers=8)] # truncation trick dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg", shape=(1, 512)) w = [lerp(dlatent_avg, _, 0.7) for _ in w] # Load direction if not args.face_morph: attr_delta = nn.NdArray.from_numpy_array( np.load(args.attr_delta_path)) attr_delta = F.reshape(attr_delta[0], (1, -1)) w_plus = [w[0] + args.coeff * attr_delta, w[1]] w_minus = [w[0] - args.coeff * attr_delta, w[1]] else: w_plus = [w[0], w[0]] # content w_minus = [w[1], w[1]] # style constant_bc = nn.parameter.get_parameter_or_create( name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4)) constant_bc = F.broadcast(constant_bc, (batch_size, ) + constant_bc.shape[1:]) gen_plus = synthesis(w_plus, constant_bc, noise_seed=100, mix_after=8) gen_minus = synthesis(w_minus, constant_bc, noise_seed=100, mix_after=8) gen = synthesis(w, constant_bc, noise_seed=100, mix_after=8) image_plus = convert_images_to_uint8(gen_plus, drange=[-1, 1]) image_minus = convert_images_to_uint8(gen_minus, drange=[-1, 1]) image = convert_images_to_uint8(gen, drange=[-1, 1]) for j in range(batch_size): filepath = os.path.join(args.save_image_path, f'image_{idx*batch_size+j}') imsave(f'{filepath}_o.png', image_plus[j], channel_first=True) imsave(f'{filepath}_y.png', image_minus[j], channel_first=True) imsave(f'{filepath}.png', image[j], channel_first=True) print(f"Genetated. Saved {filepath}")
def __init__(self, train=True, shuffle=False, rng=None): super(Cifar10DataSource, self).__init__(shuffle=shuffle) # Lock lockfile = os.path.join(get_data_home(), "cifar10.lock") start_time = time.time() while True: # busy-lock due to communication between process spawn by mpirun try: fd = os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR) break except OSError as e: if e.errno != errno.EEXIST: raise if (time.time() - start_time) >= 60 * 30: # wait for 30min raise Exception( "Timeout occured. If there are cifar10.lock in $HOME/nnabla_data, it should be deleted.") time.sleep(5) self._train = train data_uri = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" logger.info('Getting labeled data from {}.'.format(data_uri)) r = download(data_uri) # file object returned with tarfile.open(fileobj=r, mode="r:gz") as fpin: # Training data if train: images = [] labels = [] for member in fpin.getmembers(): if "data_batch" not in member.name: continue fp = fpin.extractfile(member) data = np.load(fp, encoding="bytes") images.append(data[b"data"]) labels.append(data[b"labels"]) self._size = 50000 self._images = np.concatenate( images).reshape(self._size, 3, 32, 32) self._labels = np.concatenate(labels).reshape(-1, 1) # Validation data else: for member in fpin.getmembers(): if "test_batch" not in member.name: continue fp = fpin.extractfile(member) data = np.load(fp, encoding="bytes") images = data[b"data"] labels = data[b"labels"] self._size = 10000 self._images = images.reshape(self._size, 3, 32, 32) self._labels = np.array(labels).reshape(-1, 1) r.close() logger.info('Getting labeled data from {}.'.format(data_uri)) self._size = self._labels.size self._variables = ('x', 'y') if rng is None: rng = np.random.RandomState(313) self.rng = rng self.reset() # Unlock os.close(fd) os.unlink(lockfile)