def normalize(self, data): ''' Normalize data based on normalize_mode ''' assert len(data.shape) == 4 if self.normalize_mode == '12': mean = data.mean(axis=(2, 3), dtype=np.float32, keepdims=True) std = data.std(axis=(2, 3), dtype=np.float32, keepdims=True) data = np.nan_to_num((data - mean)/std) elif self.normalize_mode == '3': shape = data.shape temp_data = data.reshape((-1, (192*224*192)//data.shape[2]//data.shape[3], 2, data.shape[2], data.shape[3])) mean = temp_data.mean(axis=1, dtype=np.float32, keepdims=True) std = temp_data.std(axis=1, dtype=np.float32, keepdims=True) data = np.nan_to_num((temp_data - mean)/std).reshape(shape) elif self.normalize_mode == '123': shape = data.shape temp_data = data.reshape((-1, (192*224*192)//data.shape[2]//data.shape[3], 2, data.shape[2], data.shape[3])) mean = temp_data.mean(axis=1, dtype=np.float32, keepdims=True) std = temp_data.std(axis=1, dtype=np.float32, keepdims=True) data = np.nan_to_num((temp_data - mean) / std).reshape(shape) mean = data.mean(axis=(2, 3), dtype=np.float32, keepdims=True) std = data.std(axis=(2, 3), dtype=np.float32, keepdims=True) data = np.nan_to_num((data - mean)/std) return data
def square_plot(data, path): if type(data) == list: data = np.concatentate(data) data = (data - data.min()) / (data.max() - data.min()) n = int(np.ceil(np.sqrt(data.shape[0]))) padding = (((0, n**2 - data.shape[0]), (0, 1), (0, 1)) + ((0, 0), ) * (data.ndim - 3)) data = np.pad(data, padding, mode='constant', constant_values=1) data = data.reshape((n, n) + data.shape[1:]).transpose( (0, 2, 1, 3) + tuple(range(4, data.ndim + 1))) data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:]) plt.imsave(path, data, cmap='gray')
def __getitem__(self, index): dir_name, index, label = self.indices[index] with h5py.File(os.path.join(dir_name, '%d.h5' % index), 'r') as f: data = np.array(f.get('data')) data = data.reshape(-1, 1, 256, 256) if self.cnn: data = data.reshape(-1, 1, 256, 256)[-1, :, :, :] # padding # data = self.__padding(data) return (torch.from_numpy(data).float(), torch.tensor(label).long())
def visualize_reordered(self, path, number, shape, permutations): data = self.visualize_sample(path, number, shape) data = data.reshape(-1, shape[0] * shape[1] * shape[2]) concat = deepcopy(data) image_frame_dim = int(np.floor(np.sqrt(number))) for i in range(1, self.n_tasks): _, inverse_permutation = permutations[i].sort() reordered_data = deepcopy(data.index_select( 1, inverse_permutation)) concat = torch.cat((concat, reordered_data), 0) if shape[2] == 1: concat = concat.numpy().reshape(number * self.n_tasks, shape[0], shape[1], shape[2]) save_images( concat[:image_frame_dim * image_frame_dim * self.n_tasks, :, :, :], [self.n_tasks * image_frame_dim, image_frame_dim], path) else: concat = concat.numpy().reshape(number * self.n_tasks, shape[2], shape[1], shape[0]) make_samples_batche(concat[:self.batch_size], self.batch_size, path)
def train( model, optimizer, epoch, data ,target , times ): model.train() for parameter in model.parameters(): print (parameter) criterion = nn.MSELoss( ) for batch_idx in range( times ): hidden = None data = Variable( torch.Tensor(data.reshape(data.shape[0],-1,1)) ,requires_grad=True) target = Variable( torch.Tensor(target.reshape(target.shape[0],-1,1)) ,requires_grad=True) # output = model( data ) #print( output - target ) lost = criterion( output , target ) optimizer.zero_grad() lost.backward() optimizer.step() #learning_rate = 0.01 '''for f in net.parameters(): f.data.sub_(f.grad.data * learning_rate) ''' print( 'Train Epoch {} , batch_num :{}, Loss: {:.6f}'.format( epoch , batch_idx , lost.data.item() ) )
def train(epoch, model, optimizer, device, log_interval, batch_size): model.train() train_loss = 0 data_set = np.load(train_file) data_size = len(data_set) data_set = np.split(data_set, data_size / batch_size) for batch_idx, data in enumerate(data_set): data = torch.from_numpy(data).float().to(device) data /= 255 data = data.permute([0, 3, 1, 2]) data = data.reshape([-1, 3, img_size, img_size]) optimizer.zero_grad() recon_batch, mu, logvar = model(data) loss = loss_function(recon_batch, data, mu, logvar) loss.backward() train_loss += loss.item() optimizer.step() if batch_idx % log_interval == 0: #save_image(data.cpu().view(-1, 3, img_size, img_size), # 'results/original.png') #save_image(recon_batch.cpu().view(-1, 3, img_size, img_size), # 'results/recon.png') # 'results/recon_' + str(epoch) + '.png') print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, (batch_idx + 1) * len(data), data_size, 100. * (batch_idx + 1) / len(data_set), loss.item() / len(data))) print('Loss: ', loss.item() / len(data)) print('====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / data_size))
def __build_truncated_dataset__(self): data_dir = os.path.join(self.root, 'train' if self.train else 'test') users, groups, data = read_data(data_dir) data_list, label_list = [], [] user_id_dict = {} for i, user in enumerate(users): user_id_dict[user] = i _data = data[user] data_list.append(np.vstack(_data['x'])) label_list.append(np.hstack(_data['y'])) data = np.vstack(data_list) target = np.hstack(label_list) if self.dataidxs is not None: data = data[self.dataidxs] target = target[self.dataidxs] # if self.dataidxs is not None: # data = data_list[user_id_dict[self.dataidxs]] # target = label_list[user_id_dict[self.dataidxs]] # else: # data = np.vstack(data_list) # target = np.hstack(label_list) data = data.reshape(-1, 28, 28) data, target = torch.from_numpy(data), torch.from_numpy(target) return data, target
def __init__( self, data, T_stim=1, T_wait=5, zero_mean=True, unit_variance=True, ): # [num_person,num_trails,50,28*28] nc, nt, T, wh = data.shape self.data = data.reshape(-1, wh) self.label = np.arange(nc).repeat(nt * T) self.T_stim = T_stim self.T_tot = T_stim + T_wait self.wh = wh if zero_mean: self.mean = np.mean(self.data) else: self.mean = 0. if unit_variance: self.var = np.std(self.data) else: self.var = 255.
def save_embedding_process(model, save_loader, feed_data, is_cuda): fts1 = feed_data['fts1'] fts2 = feed_data['fts2'] user_embedding1_list = [] user_embedding2_list = [] model.eval() for batch_idx, data in enumerate(save_loader): data = data.reshape([-1]) val_user_arr = data.numpy() v_item1 = fts1[val_user_arr] v_item2 = fts2[val_user_arr] if is_cuda: v_user = torch.LongTensor(val_user_arr).cuda() v_item1 = torch.FloatTensor(v_item1).cuda() v_item2 = torch.FloatTensor(v_item2).cuda() else: v_user = torch.LongTensor(val_user_arr) v_item1 = torch.FloatTensor(v_item1) v_item2 = torch.FloatTensor(v_item2) res = model.get_user_embedding(v_item1, v_item2) user_embedding1 = res[0] user_embedding2 = res[1] if is_cuda: user_embedding1 = user_embedding1.detach().cpu().numpy() user_embedding2 = user_embedding2.detach().cpu().numpy() else: user_embedding1 = user_embedding1.detach().numpy() user_embedding2 = user_embedding2.detach().numpy() user_embedding1_list.append(user_embedding1) user_embedding2_list.append(user_embedding2) return np.concatenate(user_embedding1_list, axis=0), np.concatenate(user_embedding2_list, axis=0)
def load_mnist_images(filename): if not os.path.exists(filename): download(filename) with gzip.open(filename, 'rb') as f: data = np.frombuffer(f.read(), np.uint8, offset=16) data = data.reshape(-1, 1, 28, 28).transpose(0,1,3,2) return data / np.float32(255)
def test(model, epoch, loader, isPrint=False): model.eval() test_loss = 0 makedirs('results/cvae_2stage/') with torch.no_grad(): for i, (data, label) in enumerate(loader): data = data.to(device) label = idx2onehot(label, 10).to(device) recon_batch, mu, logvar = model(data, label) dim_x = data.reshape(data.shape[0], -1).shape[1] test_loss += loss_function(recon_batch, data, mu, logvar, dim_x).item() # 展示原图和vae重建出来的图像 if i == 0 and isPrint: n = min(data.size(0), 8) dim_x = int(math.sqrt(dim_x)) comparison = torch.cat([ data[:n], recon_batch.view(args.batch_size, 1, dim_x, dim_x)[:n] ]) save_image(comparison.cpu(), 'results/cvae_2stage/reconstruction_' + str(epoch) + '.png', nrow=n) test_loss /= len(loader.dataset) print('====> Test set loss: {:.4f}'.format(test_loss))
def train(model, optimizer, epoch, loader): model.train() train_loss = 0 for batch_idx, (data, label) in enumerate(loader): # data shape: torch.Size([128, 1, 28, 28]) label shape: torch.Size([128]) # label 是 0~9 # 重要!! data.detach_() label.detach_() data = data.to(device) label = idx2onehot(label, 10).to(device) # label: [128, 10] optimizer.zero_grad() recon_batch, mu, logvar = model(data, label) dim_x = data.reshape(data.shape[0], -1).shape[1] loss = loss_function(recon_batch, data, mu, logvar, dim_x) loss.backward() train_loss += loss.item() optimizer.step() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(loader.dataset), 100. * batch_idx / len(loader), loss.item() / len(data))) print('====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / len(loader.dataset)))
def readbcn(file): npoints = os.path.getsize(file) // 4 with open(file, 'rb') as f: raw_data = struct.unpack('f' * npoints, f.read(npoints * 4)) data = np.asarray(raw_data, dtype=np.float32) data = data.reshape(7, len(data) // 7) return torch.from_numpy(data.T)
def __getitem__(self, index): imageName = self.data[index] data = cv2.imread(imageName, cv2.IMREAD_GRAYSCALE) label = np.zeros(10, dtype=np.float32) index = int(imageName[-7]) label[index] = 1 return t.from_numpy(data.reshape(1, imageHeight, imageWidth).astype(np.float32)), label
def __init__(self, data_path, label_path, config, mode): self.config = config self.mode = mode if mode == 'test': self.image_nums = config.test_image_nums elif mode == 'train' or mode == 'valid': self.image_nums = config.train_image_nums else: raise Exception('Error Mode.') with gzip.open(data_path) as bytestream: bytestream.read(16) buf = bytestream.read(config.image_size * config.image_size * self.image_nums * config.num_channels) data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32) data = (data - (config.pixel_depth / 2.0)) / config.pixel_depth self.data = data.reshape(self.image_nums, config.image_size, config.image_size, config.num_channels) with gzip.open(label_path) as bytestream: bytestream.read(8) buf = bytestream.read(1 * self.image_nums) self.labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64) if mode == 'train': self.image_nums = config.train_image_nums * 0.7 self.data = self.data[:int(config.train_image_nums * 0.7)] self.labels = self.labels[:int(config.train_image_nums * 0.7)] elif mode == 'valid': self.image_nums = config.train_image_nums - config.train_image_nums * 0.7 self.data = self.data[int(config.train_image_nums * 0.7):] self.labels = self.labels[int(config.train_image_nums * 0.7):]
def reshape(self, data, label, labels=None): assert data.shape[0] == label.shape[0] sequence_length = self.sequence_length n_chunks = data.shape[1] // sequence_length data = data[:, :n_chunks * sequence_length, :] data = data.reshape(data.shape[0], -1, sequence_length, data.shape[2]) data = np.concatenate(data.transpose(1, 0, 2, 3), axis=0) data = torch.Tensor(data).float() labels_reshaped = None # labels if not self.mse: label = np.repeat(label, n_chunks).reshape(label.shape[0], -1) label = np.concatenate(np.transpose(label)) label = np.repeat(label, sequence_length).reshape(-1, sequence_length) label = torch.Tensor(label).long() else: label = label[:, :n_chunks * sequence_length, :] label = label.reshape(label.shape[0], -1, sequence_length, label.shape[2]) label = np.concatenate(label.transpose(1, 0, 2, 3), axis=0) label = torch.Tensor(label).float() # labels = np.repeat(labels, n_chunks).reshape(labels.shape[0], -1) # labels = np.concatenate(np.transpose(labels)) # labels = np.repeat(labels, sequence_length).reshape(-1, sequence_length) # labels_reshaped = torch.Tensor(labels).long() return data, label
def make_and_save_volume_grid(volume: np.array, path: str) -> None: alpha = np.ones(volume.shape[1:4] + (1, ), dtype=np.float32) skip_factor = 3 images = [] for i in range(volume.shape[0]): current_volume = volume[i] / 255 current_volume = np.concatenate((current_volume, alpha), axis=3) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') x, y, z = np.meshgrid( list(range(0, current_volume.shape[0], skip_factor)), list(range(0, current_volume.shape[1], skip_factor)), list(range(0, current_volume.shape[2], skip_factor))) x, y, z = x.flatten(), y.flatten(), z.flatten() ax.scatter(x, y, z, c=current_volume[x, y, z, :].reshape((-1, 4)), marker='o') fig.canvas.draw() data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) images.append(data.reshape(fig.canvas.get_width_height()[::-1] + (3, ))) plt.close() make_and_save_image_grid(np.array(images), path, 0)
def time_series_to_plot(time_series_batch, dpi=35, feature_idx=0, n_images_per_row=4, titles=None): """Convert a batch of time series to a tensor with a grid of their plots Args: time_series_batch (Tensor): (batch_size, seq_len, dim) tensor of time series dpi (int): dpi of a single image feature_idx (int): index of the feature that goes in the plots (the first one by default) n_images_per_row (int): number of images per row in the plot titles (list of strings): list of titles for the plots Output: single (channels, width, height)-shaped tensor representing an image """ # Iterates over the time series images = [] for i, series in enumerate(time_series_batch.detach()): fig = plt.figure(dpi=dpi) ax = fig.add_subplot(1, 1, 1) if titles: ax.set_title(titles[i]) ax.plot(series[:, feature_idx].numpy()) # plots a single feature of the time series fig.canvas.draw() data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) images.append(data) plt.close(fig) # Swap channel images = torch.from_numpy(np.stack(images)).permute(0, 3, 1, 2) # Make grid grid_image = vutils.make_grid(images.detach(), nrow=n_images_per_row) return grid_image
def _read_binary_file(fname, dim): with open(fname, 'rb') as fid: data = np.fromfile(fid, dtype=np.float32) assert data.shape[0] % dim == 0.0 data = data.reshape(-1, dim) data = data.T return data, data.shape[1]
def forward(self, input): # when is_train_weight is True,choose different block to train weight each batch. if self.is_train_weight: softmax = nn.Softmax() index_candidate_block = [] for i in self.architecture_parameter: output = softmax(torch.Tensor(i)) index_candidate_block.append( output.multinomial(1).numpy().tolist()[0]) else: # when is_train_weight is false,use index_candidate_block for inferencing index_candidate_block = self.index_candidate_block batch_size = input.size()[0] self.batch_size = batch_size data = self._input_conv(input) layer_of_block = 0 for l_idx in range(self._input_conv_count, len(self._blocks)): block = self._blocks[l_idx] if isinstance(block, list): data = self._ops[layer_of_block][ index_candidate_block[layer_of_block]](data) layer_of_block += 1 else: break data = self._output_conv(data) # data = F.dropout(data, p=0.2) data = nn.functional.avg_pool2d(data, data.size()[2:]) data = data.reshape((batch_size, -1)) self.logits = self.classifier(data) return self.logits
def load_images(self, process_once=True): 'Process training data and save as images. Overwrite for each new dataset' # for cifar10 #np.random.seed(self.seed) if os.path.exists( '/home/michal5/cs498_finalproject/cifar10_val_data.txt'): process_once = False subfolders = [ 'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5' ] val_dict = [] data = [] labels = [] train_data = [] train_labels = [] val_data = [] val_labels = [] for folder in subfolders: dictionary = unpickle('/data/common/' + 'cifar-10-batches-py' + '/' + folder) images = dictionary[b'data'] label_entry = dictionary[b'labels'] data.append(images) labels += label_entry data = np.concatenate(data) data = data.reshape((50000, 3, 32, 32)) data = data.transpose((0, 2, 3, 1)) labels = np.asarray(labels) if process_once: with open('/home/michal5/cs498_finalproject/cifar10_val_data.txt' ) as f: val_values = json.load(f) for i, label in enumerate(labels): if not process_once: if random.random() > 0.8: val_data.append(data[i]) val_labels.append(label) val_dict.append(i) else: train_data.append(data[i]) train_labels.append(label) else: if i in val_values: val_data.append(data[i]) val_labels.append(label) else: train_data.append(data[i]) train_data.append(label) if not process_once: file_name = '/shared/rsaas/michal5/classes/498_dl/cs498_finalproject/' + 'cifar10_val_data' + '.txt' with open(file_name, 'w+') as image_val: json.dump(val_dict, image_val) train_labels = np.asarray(train_labels, dtype=np.int) val_labels = np.asarray(val_labels, dtype=np.int) return train_data, train_labels, val_data, val_labels
def PCA(self, img): img_avg = np.average(img, axis=(0, 1)) img_std = np.std(img, axis=(0, 1)) img_norm = (img - img_avg) / img_std img_cov = np.zeros((3, 3)) for data in img_norm.reshape(-1, 3): img_cov += data.reshape(3, 1) * data.reshape(1, 3) img_cov /= len(img_norm.reshape(-1, 3)) eig_values, eig_vectors = np.linalg.eig(img_cov) alphas = np.random.normal(0, 0.1, 3) img_reconstruct_norm = img_norm + np.sum( (eig_values + alphas) * eig_vectors, axis=1) img_reconstruct = img_reconstruct_norm * img_std + img_avg img_reboundary = np.maximum(np.minimum(img_reconstruct, 255), 0).astype(np.uint8) return img_reboundary
def __getitem__(self,index): y_path = self.y_train[index] label = np.load(y_path) x_path = self.x_train[index] data = np.load(x_path) data = data.reshape(3,128,64,64) return data, label
def transform(self, data): data = data.copy().astype('float32') data = (data - self.minn) / (self.maxx - self.minn) * 2 - 1 if self.height * self.height > len(data[0]): padding = np.zeros( (len(data), self.height * self.height - len(data[0]))) data = np.concatenate([data, padding], axis=1) return data.reshape(-1, 1, self.height, self.height)
def data_transfrom(self, data, other): data = data.astype(np.float32) if self.train: shape = np.fromstring(other[0], np.uint16) data = data.reshape(shape) # Random crop _, w, h = data.shape x1 = np.random.randint(0, w - 224) y1 = np.random.randint(0, h - 224) data = data[:, x1:x1 + 224, y1:y1 + 224] # HorizontalFlip #TODO horizontal flip else: data = data.reshape([3, 224, 224]) data = (data - mean) / std tensor = torch.Tensor(data) del data return tensor
def readbcn(file): npoints = os.path.getsize(file) // 4 with open(file,'rb') as f: raw_data = struct.unpack('f'*npoints,f.read(npoints*4)) data = np.asarray(raw_data,dtype=np.float32) # data = data.reshape(len(data)//6, 6) data = data.reshape(7, len(data)//7) # translate the nose tip to [0,0,0] # data = (data[:,0:2] - data[8157,0:2]) / 100 return torch.from_numpy(data.T)
def __init__(self, unique_section_names, target_dir, mode): super().__init__() n_files_ea_section = [] # number of files for each section n_vectors_ea_file = [] # number of vectors for each file data = numpy.empty( (0, CONFIG["feature"]["n_frames"] * CONFIG["feature"]["n_mels"]), dtype=float, ) for section_name in unique_section_names: # get file list for each section # all values of y_true are zero in training print("target_dir : %s" % (target_dir + "_" + section_name)) files, _ = util.file_list_generator( target_dir=target_dir, section_name=section_name, dir_name="train", mode=mode, ) print("number of files : %s" % (str(len(files)))) n_files_ea_section.append(len(files)) # extract features from audio files and # concatenate them into Numpy array. features, n_features = concat_features(files) data = numpy.append(data, features, axis=0) n_vectors_ea_file.append(n_features) n_vectors_ea_file = flatten(n_vectors_ea_file) # make target labels for conditioning # they are not one-hot vector! labels = numpy.zeros((data.shape[0]), dtype=int) start_index = 0 for section_index in range(unique_section_names.shape[0]): for file_id in range(n_files_ea_section[section_index]): labels[ start_index : start_index + n_vectors_ea_file[file_id] ] = section_index start_index += n_vectors_ea_file[file_id] # 1D vector to 2D image (1ch) self.data = data.reshape( ( data.shape[0], 1, # number of channels CONFIG["feature"]["n_frames"], CONFIG["feature"]["n_mels"], ) ) self.labels = labels
def load_images(path): with gzip.open(path) as bytestream: # read meta information header_buffer = bytestream.read(16) header = np.frombuffer(header_buffer, dtype='>i4') magic, n, x, y = header # read data buffer = bytestream.read(x * y * n) data = np.frombuffer(buffer, dtype='>u1').astype(np.float32) data = data.reshape(n, x * y) return data
def sample_img(data, n: int, n_total_keys: int) -> np.ndarray: N, T, H, W = data.shape n_samples = min(1 + int(MAX_MEMORY_BYTES / (n_total_keys * H * W * 8)), n) if n_samples < n: logging.warning('reduced n_samples from %d to %d ' 'due to memory limit', n, n_samples) ind = np.random.permutation(N * T)[:n_samples] data = np.copy(data.reshape((N * T, H * W))[ind]) return data.astype(np.float64) / 255
def correction_data(self, data): ''' Correct label to original shape (2, 192, 224, 192) ''' assert len(data.shape) == 4 channels = 3 if self.data_mode == 'XY': assert data.shape[1:] == (channels, 224, 192) data = data.reshape((-1, 192, channels, 224, 192)).transpose((0, 2, 1, 3, 4)) elif self.data_mode == 'ZY': assert data.shape[1:] == (channels, 192, 192) data = data.reshape((-1, 224, channels, 192, 192)).transpose((0, 2, 3, 1, 4)) elif self.data_mode == 'ZX': assert data.shape[1:] == (channels, 192, 224) data = data.reshape((-1, 192, channels, 192, 224)).transpose((0, 2, 3, 4, 1)) return data