def test_train(self): """ Fill the matrix from data :param data: pre-processed data to train with :type data: data object """ X = np.arange(1000) % 10 for i in range(1, N): M = markovChain.markovChain(i) M.train(X) T = M.transitions for state in T: target = str( list((np.arange( ast.literal_eval(state)[-1], ast.literal_eval(state)[-1] + i) + 1) % 10)) self.assertEqual(T[state][target], 1.0) X = np.arange(1000) % 10 np.shuffle(X) for i in range(1, N): M = markovChain.markovChain(i) M.train(X) P = M.getPrediction() for state in M.stateAlphabet: self.assertEqual(np.sum(P[state]), 1.0)
def next_batch_gen(self, batch_size, shuffle=True): """ A python generator function that yields a batch of data infinitely. :param batch_size: The number of samples to return for each batch. :param shuffle: If True, shuffle the entire dataset after every sample has been returned once. If False, the order or data samples stays the same. :return: A batch of data with size (batch_size, width, height, channels). """ N = self.N y = self.y X = self.x tot_batches = N // batch_size batch_count = 0 while True: if batch_count < tot_batches: batch_count += 1 batch_start = batch_count*batch_size batch_end = (batch_count + 1)* batch_size yield((X[batch_start:batch_end,:,:,:], y[batch_start:batch_end,])) else : if shuffle == True: np.shuffle(X) batch_count = 0 else: batch_count = 0
def lattice(treatments, r, randomize=None, seed=None): """ Generate a Lattice Design Args: treatments: The treatments subjects are to be randomized to. randomize: A boolean indicating if the order of treatments should be randomized. If this is for an actual trial, this should be True seed: The seed for used by numpy.random.seed() Returns: ndarray: The design matrix whose columns are the block number, the plot number, the sub plot number, the plot treatment and the sub plot treatment """ n_trt = len(treatments) k = math.sqrt(n_trt) if r not in [2, 3]: raise ValueError('DESCRIPTION') if int(k + 0.5)**2 != n_trt: raise ValueError('SOMETHING ABOUT NEEDING TO HAVE PERFECT SQUARES') square_values = list(range(k)) if randomize: np.shuffle(square_values) square_1 = [[0] * k] * k for r in range(k): for c in range(k): square_1[r][c] = square_values[k * (r - 1) + c] square_1 = np.array(square_1) square_2 = np.transpose(square_1) latin_square_trt = latin_square(k, seed=seed * 7218, unroll=True)[:, 2] order = [ i[0] for i in sorted(enumerate(latin_square_trt), key=lambda x: x[1]) ] square_3 = np.zeros((k, k)) for idx, o in enumerate(order): r_3 = idx // k c_3 = idx % k r_1 = o // k c_1 = o % k square_3[r_3, c_3] = square_1[r_1, c_1] square_3 = np.transpose(square_3) if randomize: np.random.shuffle(square_1) np.random.shuffle(square_2) np.random.shuffle(square_3) rep = [i // n_trt + 1 for i in range(3 * n_trt)] block = [i // k + 1 for i in range(3 * n_trt)] trt = [] for square in [square_1, square_2, square_3]: for col in range(k): trt.extend(square[:, col]) design_matrix = np.array([block, rep, trt]) return design_matrix
def _split_into_groups(iterable, ngroups=-1, fractions=None, shuffle=True): if shuffle: iterable = np.copy(iterable) np.shuffle(iterable) start_idxs, end_idxs = _group_start_end_idxs(len(iterable), ngroups, fractions) return [iterable[start:end] for start, end in zip(start_idxs, end_idxs)]
def __init__(self, *args, **kwargs): self.dataset = SUN_Dataset(*args, **kwargs) self.total_size = len(self.dataset) indices = list(range(self.total_size)) np.shuffle(indices) self.train_indices = indices[:self.total_size * 4 // 5] self.val_indices = indices[self.total_size * 4 // 5:] self.training = self.interface(self.dataset, self.train_indices) self.validation = self.interface(self.dataset, self.val_indices)
def __next__(self): if self._train_location >= len( self._train_indices) or self._test_location >= len( self._test_indices): # Reset: np.shuffle(self._train_indices) np.shuffle(self._test_indices) self._train_location = 0 self._test_location = 0 raise StopIteration() return self.get_train_test_batch()
def sample(self, n=1): counts = np.random.multinomial(n, self.weights, size=1) samples = np.empty((n, len(self.means[0]))) j = 0 for i in range(len(self.means)): samples[j:j+counts[i]] = stats.multivariate_normal.rvs(mean=self.means[i], cov=self.covs[i], size=counts[i]) j += counts[i] np.shuffle(samples) return samples
def load_data(tensor_path): data = [] #list of data chunks for f in os.listdir(tensor_path): if 'rate' in f: continue with open(tensor_path + f, 'rb') as p_file: song = pickle.load(p_file) data.append(song.batch(seq_len)) np.shuffle(data) return data
def sample(self, n=1): counts = np.random.multinomial(n, self.weights, size=1) samples = np.empty((n, len(self.means[0]))) k = 0 for i in range(len(self.means)): for j in range(len(self.means[i])): samples[k:k+counts[i], j] = stats.norm.rvs(loc=self.means[i, j], scale=self.stds[i, j], size=counts[i]) k += counts[i] np.shuffle(samples) return samples
def label_batch(indices): f = h5py.File(input_file) labels = f['labels'] while True: np.shuffle(indices) for i in indices: true_labels = labels[i, ..., 0] label = to_categorical( np.reshape(true_labels, true_labels.shape + (1, ))) yield (label[np.newaxis, ...], label[np.newaxis, ...])
def __iter__(self): train0 = np.random.choice(self.x, self.size_train0, replace=False) train1 = np.random.choice( self.y, self.size_train1, replace=False) # train = same proportion as in train test0 = np.setdiff1d(self.x, train0) test1 = np.random.choice( np.setdiff1d(self.y, train1), self.size_test1, replace=False) # test = same proportion as in test train = np.hstack((train0, train1)) test = np.hstack((test0, test1)) yield np.shuffle(train), np.shuffle(test)
def get_stim_resp(data_type='train'): # this function gets you the training and testing chunks! # permute chunks and decide the training and test chunks global train_chunks global test_chunks global train_counter global response def get_stimulus_batch(ichunk): stim_path = FLAGS.data_location + 'Stimulus/' stim_file = sio.loadmat( gfile.Open(stim_path + 'stim_chunk_' + str(ichunk) + '.mat')) chunk_start = np.squeeze(stim_file['chunk_start']) chunk_end = np.squeeze(stim_file['chunk_end']) jump = stim_file['jump'] stim_chunk = stim_file['stimulus_chunk'] stim_chunk = np.transpose(stim_chunk, [3, 0, 1, 2]) return stim_chunk, chunk_start, chunk_end if (data_type == 'test'): print('Loading test') chunk_ids = np.array(test_chunks).astype('int') if (data_type == 'train'): print('Loading train') if (train_counter >= train_chunks.shape[0]): train_chunks = np.shuffle(train_chunks) train_counter = 0 chunk_ids = [ np.squeeze(np.array(train_chunks[train_counter]).astype('int')) ] train_counter = train_counter + 1 stim_total = np.zeros((0, 640, 320, 3)) resp_total = np.zeros((0, FLAGS.n_cells)) data_len_total = 0 for ichunk in chunk_ids: print('Loading chunk:' + str(ichunk)) # get chunks if (ichunk == chunk_ids[0]): print('first entry') # first entry into the chunk stim_chunk, chunk_start, chunk_end = get_stimulus_batch(ichunk) resp_chunk = response[chunk_start + 29:chunk_end + 1, :] else: print('second entry') stim_chunk, chunk_start, chunk_end = get_stimulus_batch(ichunk) stim_chunk = stim_chunk[30:, :, :, :] resp_chunk = response[chunk_start + 30 - 1:chunk_end, :] data_len = resp_chunk.shape[0] print(chunk_start, chunk_end) print(np.shape(stim_chunk), np.shape(resp_chunk)) # remove overlapping parts of chunks and then append them! stim_total = np.append(stim_total, stim_chunk, axis=0) resp_total = np.append(resp_total, resp_chunk, axis=0) data_len_total = data_len_total + data_len return stim_total, resp_total, data_len_total
def train(self, train_x, epochs, batch_size=64): valid_y = np.ones((batch_size, 1)) fake_y = np.zeros((batch_size, 1)) gen_losses = np.ones((epochs)) des_losses = np.ones((epochs)) des_acc = np.ones((epochs)) idx = np.shuffle(np.arange(len(train_x))) for i in tqdm(range(epochs)): inds = np.random.randint(0, train_x.shape[0], batch_size) real = train_x[inds].squeeze(axis=1) # print(f'Real shape: {real.shape}') prior = np.random.normal(0, 1, (batch_size, self.latent_dim)) generated = self.generator.predict(prior) d_loss_real = self.descriminator.train_on_batch(real, valid_y) d_loss_generated = self.descriminator.train_on_batch( generated, fake_y) d_loss = 0.5 * np.add(d_loss_real, d_loss_generated) des_losses[i] = d_loss[0] des_acc[i] = d_loss[1] g_loss = self.combined.train_on_batch(prior, valid_y) gen_losses[i] = g_loss return des_losses, gen_losses, des_acc
def _get_colors(self, f): ''' Misterious function, ask @inconvergent :) ''' scale = 1./255. im = Image.open(f) w, h = im.size rgbim = im.convert('RGB') res = [] for i in xrange(0, w): for j in xrange(0, h): r, g, b = rgbim.getpixel((i, j)) res.append((r*scale, g*scale, b*scale)) np.shuffle(res) self.colors = res self.n_colors = len(res)
def split_data(self, folds=None, frac=None, shuffle=False): """ Splits the data into training and test. Give either frac or folds param: folds: Number of folds param: frac: Fraction of data to be test data param: shuffle: If True: shuffles the design matrix type: folds: int type: frac: float type: shuffle: Bool return: None """ if folds == None and frac == None: print( "Error: No split info received, give either no. folds or fraction." ) sys.exit(0) XY = self.XY z = self.z if shuffle: XY = np.shuffle(XY, axis=0) z = np.shuffle(z, axis=0) if folds != None: XY_folds = np.array_split(XY, folds, axis=0) z_folds = np.array_split(z, folds, axis=0) self.XY_folds = XY_folds self.z_folds = z_folds if frac != None: nTest = int(np.floor(frac * XY.shape[0])) XY_Train = XY[:-nTest] XY_Test = XY[-nTest:] z_Train = z[:-nTest] z_Test = z[-nTest:] self.XY_Train = XY_Train self.XY_Test = XY_Test self.z_Train = z_Train self.z_Test = z_Test
def train_gd(self, num_epochs= 20, objfunc): val_loss = np.zeros((num_epochs)) train_loss = np.zeros((num_epochs)) for i in range(num_epochs): np.shuffle(self.train_data) train_loss_array = self.train_obj_function(obj_function = objfunc) train_loss[i] = np.average(train_loss_array) val_loss[i] = get_validation_loss(obj_function = objfunc) #plot results x = np.arange(1, num_epochs) plt.title("Nonprobabilistic Gradient Descent training") plt.xlabel("# of Minibatches") plt.ylabel("loss") plt.plot(x,train_loss) plt.plot(x,val_array) plt.show()
def S_u_hat(X): n = len(X) f1 = 1 / (n(n - 1)) X_prime = np.shuffle(X) sum = np.array() for i in X: for j in X_prime: if i != j: U_q = U_q(i, j) #u_q里面每一个值,这个思路可行吗? sum = sum + U_q return sum
def cross_validate(data, folds): num_data = data.size[0] assert num_data >= folds fold_content = num_data / fold data_shuffled = np.shuffle(data) data_folds = [] for i in range(folds): data_folds.append(data_shuffled[i * fold_content:(i + 1) * fold_content]) assert data_folds.size[0] == folds return data_folds
def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, batch_size=200, verbose=False): """ Train this linear classifier using stochastic gradient descent. Inputs: - X: A numpy array of shape (N, D) containing training data; there are N training samples each of dimension D. - y: A numpy array of shape (N,) containing training labels; y = c means that X has label 0 <= c < C for C classes. - learning_rate: (float) learning rate for optimization. - reg: (float) regularization strength. - num_iters: (integer) number of steps to take when optimizing - batch_size: (integer) number of training examples to use at each step. - verbose: (boolean) If true, print progress during optimization. Outputs: A list containing the value of the loss function at each training iteration. """ loss_history = [] n_samples, n_features = X.shape n_classes = y.max() + 1 self.W = 0.001 * np.random.normal( loc=0.0, scale=1.0, size=(n_features, n_classes)) steps = n_samples // batch_size for iter in range(num_iters): if iter % steps == 0: mask = np.shuffle(np.arange(n_samples)) X = X[mask] y = y[mask] index = iter % steps #X_batch = X[index*batch_size:(index+1)*batch_size] #y_batch = y[index*batch_size:(index+1)*batch_size] indices = np.random.choice(n_samples, size=(batch_size, ), replace=True) X_batch = X[indices] y_batch = y[indices] loss, grad = self.loss(X_batch, y_batch, reg) loss_history.append(loss) self.W -= learning_rate * grad if verbose and iter % 100 == 0: print("iteration %d / %d: loss %f" % (iter, num_iters, loss)) return loss_history
def split_data(num_test_images, num_val_images,clean_path): np.random.seed(107) trainPaths = list(paths.list_images(clean_path)) np.shuffle(trainPaths) # buff in the format ./dataset/clean_data/data/PNEUMONIA-00.png buff = [x.split("/")[-1] for x in trainPaths] labels = [x.split("-")[0] for x in buff] le = LabelEncoder() trainLabels = le.fit_transform(labels) test_split = train_test_split(trainPaths, trainLabels, test_size=num_test_images, stratify=trainLabels, random_state=777) # perform another stratified sampling, this time to build the # validation data val_split = train_test_split(trainPaths, trainLabels, test_size=num_val_images, stratify=trainLabels, random_state=777) return (test_split, val_split)
def priority(self): # Only compute priority if requested and an update is necessary if self.priority_update_freq >= 0 and self.recompute_priority: vec = self.__results_to_feature_vector() split = int(np.ceil(vec.shape[0] * (0.8 if vec.shape[0] < 10 else 1.0))) scales = np.zeros((50,), dtype=np.float32) for _ in range(scales.shape[0]): np.shuffle(vec) features = np.copy(vec[:split, :-1]) losses = np.reshape(np.copy(vec[:split, -1]), (-1, 1)) est = np.random.uniform(0.1, 2.0) gp = GaussianProcessRegressor(kernel=RBF(length_scale=est)) gp.fit(features, losses) scales[i] += (1.0 / gp.kernel.theta[0]) self._priority = np.linalg.norm(scales.max() - scales.min()) return self._priority
def get_list_rand_idxs(self, n_rand_samples, replace=False, force_randomization=False): """ Return idxs of random samples - By default do not use replacement (each sample should only be able to be taken one) - If n_rand_samples is more than the number of points, should just return idxs to all points. :param force_randomization: :param n_rand_samples: :param replace: :return: """ if n_rand_samples > self.n_points: list_points = np.arange(self.n_points) if force_randomization is True: np.shuffle(list_points) return list_points return np.random.choice(self.n_points, size=n_rand_samples, replace=replace)
def augmentData2(x, y, augments = [], p=1.0, replace=False): """ Augments data by shuffling augments and looping through data. Applies 1 random augment to an image with probability p. """ x_old, y_old = np.copy(x), np.copy(y) x_aug, y_aug = [], [] idxs = [i for i in range(0, len(x))] np.random.shuffle(idxs) idxs = idxs[0:int(p*len(x))] for idx in idxs: np.shuffle(augments) for aug in augments: augmented = aug.augment_image(x[idx]) if not replace: x_aug.append(augmented) y_aug.append(y[idx]) else: x_old[idx] = augmented if not replace: x_old = np.concatenate((x_old, x_aug)) y_old = np.concatenate((y_old, y_aug)) return x_old, y_old return x_old, y_old
def crossValidation(model, X,Y,K=5): X,Y=np.shuffle(X,Y) sz=len(X)//K errors=[] for k in range(K): xtrain=np.concatenate((X[:k*sz,:],X[(k+1)*sz:,:])) ytrain = np.concatenate((Y[:K*sz], Y[(k + 1) * sz:])) xtest=X[k*sz:(k+1)*sz,:] ytest=Y[k*sz:(k+1)*sz] model.fit(xtrain,ytrain) error=model.score(xtest,ytest) errors.append(error) print('errors:', errors) return np.mean(errors)
def Train(net, train_pathes_dir, batch_size, epochs): print('Start the training') for epoch in range(epochs): loss_tr = np.zeros((1, 2)) loss_ts = np.zeros((1, 2)) # Loading the data set trn_data_dirs = glob.glob(train_pathes_dir + '/*.npy') # Random shuffle the data np.shuffle(trn_data_dirs) # Computing the number of batchs batch_idxs = len(trn_data_dirs) // batch_size for idx in xrange(0, batch_idxs): batch_files = trn_data_dirs[idx * batch_size:(idx + 1) * batch_size] batch = [load_data(batch_file) for batch_file in batch_files] batch_images = np.array(batch).astype(np.float32) x_train_b = batch[:, :, :, 0:3] y_train = batch[:, :, :, 3] # Performing hot encoding loss_tr = loss_tr + net.train_on_batch(x_train_b, y_train_hot_encoding) loss_tr = loss_tr / n_batchs_tr
def plot_images(images): ''' images(np.array) is the array includes the anime faces. This function is intended to plot a 10*10 subplots where each subplot is a anime face. ''' images = images * 0.5 + 0.5 if len(images) > 100: idx = np.arange(0, len(images)) np.shuffle(idx) idx_pick = idx[:100] images_ = images[idx_pick] else: images_ = images num_grid = math.ceil(math.sqrt(len(images_))) fig, axes = plt.subplots(num_grid, num_grid, figsize = (10, 10)) for i, ax in enumerate(axes.flat): if i < len(images_): ax.imshow(images_[i, :, :, :]) ax.axis('off') ax.set_xticks([]) ax.set_yticks([]) plt.show()
def Reset(self): if not self.InOrder: if self.Label is not None: self.Data, self.Label = shuffle_union(self.Data, self.Label) else: self.Data = np.shuffle(self.Data) self.DataNum = self.Data.shape[0] self.TestNum = int(self.DataNum * self.TestProp) self.TrainNum = self.DataNum - self.TestNum self.TestData = self.Data[self.TrainNum:] self.TrainData = self.Data[:self.TrainNum] if self.Label is not None: self.TestLabel = self.Label[self.TrainNum:] self.TrainLabel = self.Label[:self.TrainNum] self.TestPivot = 0 self.TrainPivot = 0
def ICP_train_full(text_t2a, audio_t2a, audio_a2t, text_a2t): ''' Args: text_t2a: the source of doing t2a ,the text embedding audio_t2a: the target of doing t2a, the paired audio embedding audio_a2t: the source of doing a2t, the audio embedding text_a2t: the target of doing a2t, the paired text embedding Return: at2_mat: the a2t transform matrix t2a_mat: the t2a transform matrix ''' order_t2a = np.shuffle(np.arange(text_t2a.shape[0])) order_a2t = np.shuffle(np.arnage(audio_a2t.shape[0])) np.shuffle(order_t2a) np.shuffle(order_a2t) text_t2a_copy = text_t2a[order_t2a] audio_t2a_copy = audio_t2a[order_t2a] audio_a2t_copy = audio_a2t[order_a2t] text_a2t_copy = text_t2a[order_a2t] train_core = ICP.audio2text_ICP(audio_t2a.shape[1], text_a2t.shape[1], FLAG.mb, FLAG.penalty_lambda) dim = audio_t2a.shape[1] a2t_mat = np.identity(dim) t2a_mat = np.identity(dim) g_audio_a2t = gen_batch(audio_a2t_copy) g_audio_t2a = gen_batch(audio_t2a_copy) g_text_a2t = gen_batch(text_a2t_copy) g_text_t2a = gen_batch(text_t2a_copy) for i in range(100000): batch_a2t_audio = next(g_audio_a2t) batch_a2t_text = next(g_text_a2t) batch_t2a_audio = next(g_audio_t2a) batch_t2a_text = next(g_text_t2a) train_core.train( t2a_text=batch_t2a_text, t2a_audio=batch_t2a_audio, a2t_text=batch_a2t_text, a2t_audio=batch_a2t_audio, lr=FLAG.lr, epoch=1, ) tmp = train_core.get_matrix() a2t_mat = np.reshape(np.array(tmp[0]), (dim, dim)) t2a_mat = np.reshape(np.array(tmp[1]), (dim, dim)) return a2t_mat, t2a_mat
def generate_training_set(path, colNum=7): if path[-1] != '/': path = path + '/' subProcCmd = "{:>s}*.DUMP".format(path) dumpFile = list_files(subProcCmd) subProcCmd = "{:>s}*.DAT".format(path) snFileList = list_files(subProcCmd) snGenType = pd.read_csv(dumpFile[0], sep=' ', skiprows=0, header=1, usecols=[colNum], skipinitialspace=True, engine='c') shuffIdxArray = np.shuffle(np.arange(len(np.where( snGenType['GENTYPE'] == 1 )[0])))
def initialize_centroids(points, k): centroids = points np.shuffle(centroids) return centroids[:k]
def on_epoch_end(self): """ Shuffle data ids to load in next """ self.ids = os.listdir(self.data_dir) self.ids = np.shuffle(self.ids)
%timeit np.random.normal(size=N) from random import normalvariate %timeit samples=[normalvariate(0,1) for _ in xrange(N)] np.random.permutation(1000) np.random.shuffle(1000) np.random.shuffle(asrange(1000)) np.random.shuffle(arange(1000)) a=np.random.shuffle(arange(1000)) a a=np.random.shuffle(range(1000)) a print a a=np.random.shuffle([1,2,3]) a np.random.shuffle([1,2,3]) np.shuffle([1,2,3]) np.random.shuffle(xrange(1000)) nstep nsteps=10000 draws=np.random.randint(0,2,size=nsteps) steps=np.where(draws>0,1,-1_ ) steps=np.where(draws>0,1,-1) walk=steps.cumsum() walk.min() walk.max() plt.draw(walk) plt.plot(walk) walk.sum() walk.mean() walk