def normalize_min_max(self, data): data = np.array(data) denominator = data.max(axis=0) - data.min(axis=0) denominator[denominator == 0] = 1 data = (data - data.min(axis=0)) / denominator return data
def __init__(self, data, ground_truth, semi=False): super(HyperX, self).__init__() # Normalize the data in [0,1] data = (data - data.min()) / (data.max() - data.min()) self.data = data self.gt = ground_truth self.n_classes = len(np.unique(ground_truth)) if semi: # Semi-supervision, include neighbours at 50px x_pos, y_pos = np.nonzero(morphology.dilation(ground_truth > 0, morphology.disk(50))) else: x_pos, y_pos = np.nonzero(ground_truth) self.indices = [idx for idx in zip(x_pos, y_pos)]
def square_plot(data, path): if type(data) == list: data = np.concatentate(data) data = (data - data.min()) / (data.max() - data.min()) n = int(np.ceil(np.sqrt(data.shape[0]))) padding = (((0, n**2 - data.shape[0]), (0, 1), (0, 1)) + ((0, 0), ) * (data.ndim - 3)) data = np.pad(data, padding, mode='constant', constant_values=1) data = data.reshape((n, n) + data.shape[1:]).transpose( (0, 2, 1, 3) + tuple(range(4, data.ndim + 1))) data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:]) plt.imsave(path, data, cmap='gray')
def normalize(attr_data): data = np.array([i for item in attr_data for i in item]) data_min = data.min() data_max = data.max() std_attr_data = (data - data_min) / (data_max - data_min) std_attr_data = std_attr_data.astype(np.float32) return std_attr_data
def normalize(data, old_min=None, old_max=None, new_min=0, new_max=1, dim='time'): # Function to remove seasonality from data # Returns de-seasonalized data with same shape as input if 'time' in data.dims: # get year and month as separate dimension data = unstack_month_and_year(data) if dim == 'time': data = data.stack(time=['year', 'month']) if old_min is None: old_min = data.min(dim=dim) old_max = data.max(dim=dim) data.values = np.float32( minmax_scaler(data, old_min=old_min, new_min=new_min, old_max=old_max, new_max=new_max)) return data.unstack(), old_min, old_max
def __normalize(self, data): max = data.max(axis=0, keepdims=True) min = data.min(axis=0, keepdims=True) data = (data - min) / (max - min) return (data - 0.5) / 0.5
def experiment_gan_plot(data, samples, title, ax, is_spiral=False): if is_spiral: ax.scatter(data[:, 0], data[:, 1], label='real') ax.scatter(samples[:, 0], samples[:, 1], label='fake') else: data_grid = np.linspace(data.min(), data.max(), 1000) sample_grid = np.linspace(samples.min(), samples.max(), 1000) bandwidth = 0.1 data_p_n = kde(data, data_grid, bandwidth=bandwidth) sample_p_n = kde(samples, sample_grid, bandwidth=bandwidth) ax.fill_between(x=data_grid, y1=data_p_n, y2=0, alpha=0.7, label='real') ax.fill_between(x=sample_grid, y1=sample_p_n, y2=0, alpha=0.7, label='fake') ax.legend(prop={'size': 40}) ax.grid() ax.set_title(title, fontsize=45) ax.tick_params(axis="x", labelsize=40) ax.tick_params(axis="y", labelsize=40)
def transform_inputs(loader, batch_size=batch_size): encoded_inputs = [] labels = [] tq = tqdm(loader) with torch.no_grad(): for batch_idx, (data, label) in enumerate(tq): data = Variable(data.squeeze().transpose(0, 1)) data = (data - data.min().item()) / (data.max().item() - data.min().item()) h = model.predict(data) for i in range(h.shape[1]): encoded_inputs.append(h[:, i, :].flatten().numpy()) labels.append(label[i].item()) return torch.utils.data.DataLoader(torch.utils.data.TensorDataset( torch.Tensor(encoded_inputs), torch.Tensor(labels)), batch_size=batch_size, shuffle=True)
def bytescale(data, high=255, low=0): """ Byte scales an array (image). Byte scaling means converting the input image to uint8 dtype and scaling the range to ``(low, high)`` (default 0-255). If the input image already has dtype uint8, no scaling is done. This function is only available if Python Imaging Library (PIL) is installed. Parameters ---------- data : ndarray PIL image data array. cmin : scalar, optional Bias scaling of small values. Default is ``data.min()``. cmax : scalar, optional Bias scaling of large values. Default is ``data.max()``. high : scalar, optional Scale max value to `high`. Default is 255. low : scalar, optional Scale min value to `low`. Default is 0. Returns ------- img_array : uint8 ndarray The byte-scaled array. Examples -------- >>> from scipy.misc import bytescale >>> img = np.array([[ 91.06794177, 3.39058326, 84.4221549 ], ... [ 73.88003259, 80.91433048, 4.88878881], ... [ 51.53875334, 34.45808177, 27.5873488 ]]) >>> bytescale(img) array([[255, 0, 236], [205, 225, 4], [140, 90, 70]], dtype=uint8) >>> bytescale(img, high=200, low=100) array([[200, 100, 192], [180, 188, 102], [155, 135, 128]], dtype=uint8) >>> bytescale(img, cmin=0, cmax=255) array([[91, 3, 84], [74, 81, 5], [52, 34, 28]], dtype=uint8) """ if data.dtype == np.uint8: return data if high > 255 or low < 0 or high < low: raise ValueError("check high low values") cmin = data.min() cmax = data.max() cscale = cmax - cmin if cscale == 0: cscale = 1 scale = float(high - low) / cscale bytedata = (data - cmin) * scale + low return (bytedata.clip(low, high) + 0.5).astype(np.uint8)
def df_bold_min(data): ''' highlight the maximum in a Series or DataFrame Usage: `df.style.apply(df_bold_min)` ''' attr = 'font-weight: bold' #remove % and cast to float data = data.replace('%','', regex=True).astype(float) if data.ndim == 1: # Series from .apply(axis=0) or axis=1 is_min = data == data.min() return [attr if v else '' for v in is_min] else: # from .apply(axis=None) is_min = data == data.min().min() return pd.DataFrame(np.where(is_min, attr, ''), index=data.index, columns=data.columns)
def normalize_dataset(data, normalizer, column_wise=False): if normalizer == 'max01': if column_wise: minimum = data.min(axis=0, keepdims=True) maximum = data.max(axis=0, keepdims=True) else: minimum = data.min() maximum = data.max() scaler = MinMax01Scaler(minimum, maximum) data = scaler.transform(data) print('Normalize the dataset by MinMax01 Normalization') elif normalizer == 'max11': if column_wise: minimum = data.min(axis=0, keepdims=True) maximum = data.max(axis=0, keepdims=True) else: minimum = data.min() maximum = data.max() scaler = MinMax11Scaler(minimum, maximum) data = scaler.transform(data) print('Normalize the dataset by MinMax11 Normalization') elif normalizer == 'std': if column_wise: mean = data.mean(axis=0, keepdims=True) std = data.std(axis=0, keepdims=True) else: mean = data.mean() std = data.std() scaler = StandardScaler(mean, std) data = scaler.transform(data) print('Normalize the dataset by Standard Normalization') elif normalizer == 'None': scaler = NScaler() data = scaler.transform(data) print('Does not normalize the dataset') elif normalizer == 'cmax': #column min max, to be depressed #note: axis must be the spatial dimension, please check ! scaler = ColumnMinMaxScaler(data.min(axis=0), data.max(axis=0)) data = scaler.transform(data) print('Normalize the dataset by Column Min-Max Normalization') else: raise ValueError return data, scaler
def load_data(data_source, seqlen, date_op, date_ed, normalize=True): # data shape: batch * seqlen * feature with open(mapped_edge_filepath, 'r') as f: mapped_edges = json.load(f) totle_time_interval_num = day_time_interval_num * (int(date_ed) - int(date_op) + 1) data = np.zeros((edge_num, totle_time_interval_num)) speedfiles = os.listdir(datadir + data_source + '/') for speedfile in speedfiles: if speedfile.startswith('.') or speedfile.split( '.')[0] > date_ed or speedfile.split('.')[0] < date_op: continue day = int(speedfile.split('.')[0][-2:]) - int(date_op[-2:]) with open(datadir + data_source + '/' + speedfile, 'r') as f: speed = json.load(f) for edge in speed: # current edge not in the selected region if edge not in mapped_edges: continue for time_interval in speed[edge]: edge_id = mapped_edges[edge] data[edge_id, day * day_time_interval_num + int(time_interval)] = speed[edge][time_interval] ub_index = np.where(data == 0) if normalize: data = (data - data.min()) / data.max() data[ub_index] = 0 output = [] for i in range(totle_time_interval_num - seqlen): output.append(data[:, i:i + seqlen]) output = torch.FloatTensor(output).permute(0, 2, 1) filled_rate = len(np.where(output.numpy() != 0)[0]) / ( output.shape[0] * output.shape[1] * output.shape[2]) print( len(np.where(output.numpy()[:72, :, :] != 0)[0]) / output[:72, :, :].numel()) print( len(np.where(output.numpy()[72:120, :, :] != 0)[0]) / output[72:120, :, :].numel()) print( len(np.where(output.numpy()[120:204, :, :] != 0)[0]) / output[120:204, :, :].numel()) print( len(np.where(output.numpy()[204:240, :, :] != 0)[0]) / output[204:240, :, :].numel()) print( len(np.where(output.numpy()[240:288, :, :] != 0)[0]) / output[240:288, :, :].numel()) print(data_source, 'data shape:', output.shape, 'filled rate:', filled_rate) return output
def train(epoch): train_loss = 0 for batch_idx, data in enumerate(train_loader): #transforming data #data = Variable(data) #to remove eventually data = Variable(torch.unsqueeze(data['frame'], 1)).float().cuda() data = (data - data.min().item()) / (data.max().item() - data.min().item()) #forward + backward + optimize optimizer.zero_grad() kld_loss, nll_loss, _, _ = model(data) loss = kld_loss + nll_loss loss.backward() optimizer.step() #grad norm clipping, only in pytorch version >= 1.10 nn.utils.clip_grad_norm_(model.parameters(), clip) #sample = model.sample(batch_size, 14) #print('sample') #print(sample) #plt.imshow(sample.numpy()) #plt.pause(1e-6) #printing if batch_idx % print_every == 0: print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\t KLD Loss: {:.6f} \t NLL Loss: {:.6f}' .format(epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), kld_loss.item() / batch_size, nll_loss.item() / batch_size)) train_loss += loss.item() print('====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / len(train_loader.dataset))) return
def test(epoch): """uses test data to evaluate likelihood of the model""" mean_kld_loss, mean_nll_loss = 0, 0 for i, (data, _) in enumerate(test_loader): #data = Variable(data) data = Variable(data.squeeze().transpose(0, 1)) data = (data - data.min().item()) / (data.max().item() - data.min().item()) kld_loss, nll_loss, _, _ = model(data) mean_kld_loss += kld_loss.item() mean_nll_loss += nll_loss.item() mean_kld_loss /= len(test_loader.dataset) mean_nll_loss /= len(test_loader.dataset) print('====> Test set loss: KLD Loss = {:.4f}, NLL Loss = {:.4f} '.format( mean_kld_loss, mean_nll_loss))
def train(epoch): train_loss = 0 for batch_idx, (data, _) in enumerate(train_loader): #transforming data #data = Variable(data) #to remove eventually #data, _ = data.to(device, dtype=torch.float), _.to(device, dtype=torch.float) data = Variable(data.squeeze().transpose(0, 1)) data = (data - data.min().data.item()) / (data.max().data.item() - data.min().data.item()) #data = data.to(device) #forward + backward + optimize optimizer.zero_grad() kld_loss, nll_loss, _, _ = model(data) loss = kld_loss + nll_loss loss.backward() optimizer.step() #grad norm clipping, only in pytorch version >= 1.10 nn.utils.clip_grad_norm(model.parameters(), clip) #printing if batch_idx % print_every == 0: print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\t KLD Loss: {:.6f} \t NLL Loss: {:.6f}' .format(epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), kld_loss.data.item() / batch_size, nll_loss.data.item() / batch_size)) sample = model.sample(28) plt.imshow(sample.numpy()) plt.pause(1e-6) train_loss += loss.data.item() print('====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / len(train_loader.dataset)))
def train(epoch): train_loss = 0 tq = tqdm(train_loader) for batch_idx, (data, _) in enumerate(tq): data = Variable(data.squeeze().transpose(0, 1)) data = (data - data.min().item()) / (data.max().item() - data.min().item()) #forward + backward + optimize optimizer.zero_grad() kld_loss, nll_loss, _, _ = model(data) loss = kld_loss + nll_loss loss.backward() optimizer.step() #grad norm clipping, only in pytorch version >= 1.10 nn.utils.clip_grad_norm(model.parameters(), clip) tq.set_postfix(kld_loss=(kld_loss.item() / batch_size), nll_loss=(nll_loss.item() / batch_size)) train_loss += loss.item() return
def pil_and_hdf5_loader(path): # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) # for hdf5 if(path.endswith('.hdf5')): with h5py.File(path, 'r') as f: data = f[HDF5_DATASET_NAME][:].astype(np.float32) # normalize it to 0 to 1 data /= (data.max() - data.min() + 0.0001) # normalize to -1 to 1 data *= 2 data -= 1 # the torrchvision.transforms.toTensor rescales input to the range -1 to 1 in certain conditions, # we want to scale -1 to 1 # so scale in the dataloader itself! # link: https://pytorch.org/docs/stable/torchvision/transforms.html return data # note: # DONOT USE: np.array(f[hdf5_dataset_name]) it was much slower in testing # for other types else: with open(path, 'rb') as f: img = Image.open(f) data = np.array(img.convert('RGB')).astype(np.float32) # here too we want the scaling to be from -1 to 1 # the to tensor normalizes 0 to 1 only if the numpy array is of type uint8 # so return float32 image instead # link: https://pytorch.org/docs/stable/torchvision/transforms.html # normalize it to 0 to 1 data /= (data.max() - data.min() + 0.0001) # normalize to -1 to 1 data *= 2 data -= 1 return data
def vis_square(data, name_fig): '''Take an array of shape (n, height, width) or (n, height, width, 3) #and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)''' # normalize data for display data = (data - data.min()) / (data.max() - data.min()) # force the number of filters to be square n = int(np.ceil(np.sqrt(data.shape[0]))) padding = (((0, n ** 2 - data.shape[0]), (0, 1), (0, 1)) # add some space between filters + ((0, 0),) * (data.ndim - 3)) # don't pad the last dimension (if there is one) data = np.pad(data, padding, mode='constant', constant_values=1) # pad with ones (white) # tile the filters into an image data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1))) data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:]) plt.imshow(data) plt.show() plt.imsave(name_fig,data) return data
def min_max_scale(data, center=False): min_ = data.min(axis=0) max_ = data.max(axis=0) denom = np.broadcast_to((max_ - min_), data.shape) scaled_data = np.divide((data - min_), denom, where=(~np.isclose(denom, 0))) if center: mean_ = scaled_data.mean(axis=0) scaled_data = scaled_data - mean_ else: mean_ = None return scaled_data, min_, max_, mean_
def train(epoch): train_loss = 0 for batch_idx, (data, _) in enumerate(train_loader): #transforming data data = data.to(device) data = data.squeeze().transpose(0, 1) # (seq, batch, elem) data = (data - data.min()) / (data.max() - data.min()) #forward + backward + optimize optimizer.zero_grad() kld_loss, nll_loss, _, _ = model(data) loss = kld_loss + nll_loss loss.backward() optimizer.step() #grad norm clipping, only in pytorch version >= 1.10 nn.utils.clip_grad_norm_(model.parameters(), clip) #printing if batch_idx % print_every == 0: print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\t KLD Loss: {:.6f} \t NLL Loss: {:.6f}' .format(epoch, batch_idx * batch_size, batch_size * (len(train_loader.dataset) // batch_size), 100. * batch_idx / len(train_loader), kld_loss / batch_size, nll_loss / batch_size)) sample = model.sample(torch.tensor(28, device=device)) plt.imshow(sample.to(torch.device('cpu')).numpy()) plt.pause(1e-6) train_loss += loss.item() print('====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / len(train_loader.dataset)))
def bytescale(data, cmin=None, cmax=None, high=255, low=0): """ Byte scales an array (image). Byte scaling means converting the input image to uint8 dtype and scaling the range to ``(low, high)`` (default 0-255). If the input image already has dtype uint8, no scaling is done. This function is only available if Python Imaging Library (PIL) is installed. Parameters ---------- data : ndarray PIL image data array. cmin : scalar, optional Bias scaling of small values. Default is ``data.min()``. cmax : scalar, optional Bias scaling of large values. Default is ``data.max()``. high : scalar, optional Scale max value to `high`. Default is 255. low : scalar, optional Scale min value to `low`. Default is 0. Returns ------- img_array : uint8 ndarray The byte-scaled array. """ if data.dtype == np.uint8: return data if high > 255: raise ValueError("`high` should be less than or equal to 255.") if low < 0: raise ValueError("`low` should be greater than or equal to 0.") if high < low: raise ValueError("`high` should be greater than or equal to `low`.") if cmin is None: cmin = data.min() if cmax is None: cmax = data.max() cscale = cmax - cmin if cscale < 0: raise ValueError("`cmax` should be larger than `cmin`.") elif cscale == 0: cscale = 1 scale = float(high - low) / cscale bytedata = (data - cmin) * scale + low return (bytedata.clip(low, high) + 0.5).astype(np.uint8)
def hdf5_loader(path): # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) # for hdf5 if(path.endswith('.hdf5')): with h5py.File(path, 'r') as f: data = f[HDF5_DATASET_NAME][:].astype(np.float32) # normalize it to 0 to 1 data /= (data.max() - data.min() + 0.0001) # normalize to -1 to 1 data *= 2 data -= 1 # the torrchvision.transforms.toTensor rescales input to the range -1 to 1 in certain conditions, # we want to scale -1 to 1 # so scale in the dataloader itself! # link: https://pytorch.org/docs/stable/torchvision/transforms.html return data
def visualize_gradients(viz, data, caption='', zoom=4): batchSize = data.size(0) rows = int(math.sqrt(batchSize)) toPIL = transforms.ToPILImage() # normalize it data = data.cpu() dmin = data.min() dmax = data.max() width = dmax - dmin if (width > 0.0): data = data.add(-dmin).div(width) data_imgs = utils.make_grid(data, nrow=rows) pimg = toPIL(data_imgs) pimg = pimg.resize((pimg.height * zoom, pimg.width * zoom), Image.NEAREST) imgarray = np.array(pimg) new_image = torch.from_numpy(imgarray) assert (new_image.dim() == 3)
def visualize_batch(viz, data, caption='', normalize=True, gammacorrect=False, window=None): if(gammacorrect): gamma = 2.20 data = data.pow(1.0/gamma) if(normalize == False): #data = data.mul(0.5).add(0.5).clamp(0, 1) data = data.clamp(0, 1) else: dmin = data.min() dmax = data.max() width = dmax - dmin if (width > 0.0): data = data.add(-dmin).div(width) #data_imgs = utils.make_grid(data).permute(1, 2, 0) data_imgs = utils.make_grid(data) #viz.showImage(data_imgs, caption, window=window)
def visualize_experiment_dataset(is_spiral=False, modes=1, param_modes=[(0, 1)]): data = experiment_data(is_spiral=is_spiral, n_modes=modes, params=param_modes) plt.figure(figsize=(20, 20)) plt.rc("text", usetex=True) plt.title("Initial Distribution", fontsize=45) if is_spiral: plt.scatter(data[:, 0], data[:, 1], label='train spiral data') else: data_grid = np.linspace(data.min(), data.max(), 1000) bandwidth = 0.1 data_p_n = kde(data, data_grid, bandwidth=bandwidth) plt.fill_between(x=data_grid, y1=data_p_n, y2=0, alpha=0.7) plt.xticks(fontsize=40) plt.yticks(fontsize=40) plt.grid() savefig("results/intial_distr.pdf") plt.show()
def run_fid(data, sample): assert data.max() <=1 and data.min() >= 0 assert sample.max() <=1 and sample.min() >= 0 data = 2*data - 1 if data.shape[1] == 1: data = data.repeat(1,3,1,1) data = data.detach() with torch.no_grad(): iss, _, _, acts_real = inception_score(data, cuda=True, batch_size=32, resize=True, splits=10, return_preds=True) sample = 2*sample - 1 if sample.shape[1] == 1: sample = sample.repeat(1,3,1,1) sample = sample.detach() with torch.no_grad(): issf, _, _, acts_fake = inception_score(sample, cuda=True, batch_size=32, resize=True, splits=10, return_preds=True) # idxs_ = np.argsort(np.abs(acts_fake).sum(-1))[:1800] # filter the ones with super large values # acts_fake = acts_fake[idxs_] m1, s1 = calculate_activation_statistics(acts_real) m2, s2 = calculate_activation_statistics(acts_fake) fid_value = calculate_frechet_distance(m1, s1, m2, s2) return fid_value
def get_dataloader(dataset, batch_size=128, window=12, horizon=1, val_days=10, test_days=10, normalizer='max'): if dataset == 'SYDNEY': data = Load_Sydney_Demand_Data( os.path.join(base_dir, '1h_data_new3.csv')) print(data.shape) print('Load Sydney Dataset Successfully!') if normalizer == 'max': scaler = MinMaxScaler(data.min(), data.max()) data = scaler.transform(data) print('Normalize the dataset by MinMax Normalization') elif normalizer == 'std': scaler = StandardScaler(data.mean(), data.std()) data = scaler.transform(data) print('Normalize the dataset by Standard Normalization') else: scaler = None X, Y = Add_Window_Horizon(data, window, horizon) print(X.shape, Y.shape) x_tra, x_val, x_test = split_train_val_test(X, val_days, test_days) y_tra, y_val, y_test = split_train_val_test(Y, val_days, test_days) print(x_tra.shape, y_tra.shape) print(x_val.shape, y_val.shape) print(x_test.shape, y_test.shape) train_dataloader = data_loader(x_tra, y_tra, batch_size, 'train') val_dataloader = data_loader(x_val, y_val, batch_size, 'val') test_dataloader = data_loader(x_test, y_test, batch_size, 'test') dataloader = data_loader(X, Y, batch_size, 'all') return train_dataloader, val_dataloader, test_dataloader, scaler
batch_size=1, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader( VOC2011ClassSeg( root, split='seg11valid', transform=True), batch_size=1, shuffle=False, **kwargs) # In[11]: #get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt for data, target in train_loader: break print(data.shape) print(target.shape) data.min() data_show, label_show = train_loader.dataset.untransform(data[0].cpu().clone(), target[0].cpu().clone()) plt.imshow(data_show) plt.savefig('imagesProduced/data_show') #plt.show() def imshow_label(label_show): import matplotlib cmap = plt.cm.jet # extract all colors from the .jet map cmaplist = [cmap(i) for i in range(cmap.N)] cmaplist[0] = (0.0,0.0,0.0,1.0) cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N) # define the bins and normalize bounds = np.arange(0,len(train_loader.dataset.class_names))
def read(self, pattern: str, uri: str): data = cv2.imread(pattern.format(band=self.value, uri=uri), cv2.IMREAD_GRAYSCALE) data = (1 + data - data.min()) / (1 + data.max() - data.min()) return data
def normalize(data): return (225 * (data - data.min()) / (data.max() - data.min())).astype(int)