def create_batches(self, train_file, batch_size, sequence_length): self.x_data = [] self.y_data = [] padding_index = self.vocab_size - 1 for line in open(train_file): line = line.decode('utf-8').replace('\n', '') text, label = line.strip().split('\t') tokens = fool.cut(re.sub(r'\w+', ' L', text)) seq_ids = [self.token_dictionary.get(token) for token in tokens[0] if token not in self.stop_words and self.token_dictionary.get(token) is not None and not chinese.is_other_all(token)] seq_ids = seq_ids[:sequence_length] for _ in range(len(seq_ids), sequence_length): seq_ids.append(padding_index) self.x_data.append(seq_ids) self.y_data.append(self.label_dictionary.get(label)) self.num_batches = int(len(self.x_data) / batch_size) self.x_data = self.x_data[:self.num_batches * batch_size] self.y_data = self.y_data[:self.num_batches * batch_size] self.x_data = np.array(self.x_data, dtype=int) self.y_data = np.array(self.y_data, dtype=int) self.x_batches = np.split(self.x_data.reshape(batch_size, -1), self.num_batches, 1) self.y_batches = np.split(self.y_data.reshape(batch_size, -1), self.num_batches, 1) self.pointer = 0
def gridify_image(image, grid): ''' Extracts a grid of sub-images of a 01c image, as a b01c batch. ''' assert_equal(image.ndim, 3) # a single image in 01c format. grid = numpy.asarray(grid, dtype=int) assert_true(numpy.all(grid > 0)) # Trim off excess rows and columns image_shape = numpy.asarray(image.shape[:2]) trimmed_shape = image_shape - image_shape % grid image = image[:trimmed_shape[0], :trimmed_shape[1], :] # b01c image = image[numpy.newaxis, ...] grid_cells = [] grid_rows = numpy.split(image, grid[0], axis=1) for grid_row in grid_rows: row_cells = numpy.split(grid_row, grid[1], axis=2) grid_cells.extend(row_cells) # concatenate grid_cells along batch axis return numpy.concatenate(grid_cells, axis=0)
def segments(self): if self.a and self.b: assert self.a.data_high == self.b.data_high assert self.a.rate == self.b.rate a = self.a.data_wider() b = self.b.data_wider() offset = self.offset with warnings.catch_warnings(): # Changing handling of empty arrays not relevant to us warnings.simplefilter("ignore", FutureWarning) acs = np.split(a, (max(0, offset), len(b)+offset)) bcs = np.split(b, (max(0, -offset), len(a)-offset)) for i, (ac, bc) in enumerate(zip(acs, bcs)): if i == 1: assert len(ac) == len(bc) yield Segment( ac, bc, self.a.data_high, self.a.rate, min(a.size, b.size)) # matches total in cmp_track elif len(ac): assert not len(bc) # careful with np.zeros type yield Segment(ac, ac*0, self.a.data_high, self.a.rate, ac.size, padding="-") elif len(bc): yield Segment(bc*0, bc, self.a.data_high, self.a.rate, bc.size, padding="+") elif self.a: ac = self.a.data_wider() yield Segment(ac, ac*0, self.a.data_high, self.a.rate, ac.size, padding="<") elif self.b: bc = self.b.data_wider() yield Segment(bc*0, bc, self.b.data_high, self.b.rate, bc.size, padding=">")
def _read_tile(self, filename): with open(filename, "r") as tilefile: # this is reversed from the fortran b/c in is a reserved word self.ni, self.nj, self.nk = np.fromfile(tilefile, dtype="int32", count = 3, sep = " ") raw_data= np.genfromtxt(tilefile, dtype = ("int32", "float64", "float64", "float64", "float64"), names = ("idx", "a", "b", "vla", "vlb")) self.ii, self.ij, self.ik = np.split(raw_data["idx"], [self.ni, self.ni+self.nj]) self.x1a, self.x2a, self.x3a = np.split(raw_data["a"], [self.ni, self.ni+self.nj]) self.x1b, self.x2b, self.x3b = np.split(raw_data["b"], [self.ni, self.ni+self.nj]) self.vl1a, self.vl2a, self.vl3a = np.split(raw_data["vla"], [self.ni, self.ni+self.nj]) self.vl1b, self.vl2b, self.vl3b = np.split(raw_data["vlb"], [self.ni, self.ni+self.nj]) return
def drop_samples(game, prob): """Drop samples from a sample game Samples are dropped independently with probability prob.""" sample_map = {} for prof, pays in zip(np.split(game.profiles, game.sample_starts[1:]), game.sample_payoffs): num_profiles, _, num_samples = pays.shape perm = rand.permutation(num_profiles) prof = prof[perm] pays = pays[perm] new_samples, counts = np.unique( rand.binomial(num_samples, prob, num_profiles), return_counts=True) splits = counts[:-1].cumsum() for num, prof_samp, pay_samp in zip( new_samples, np.split(prof, splits), np.split(pays, splits)): if num == 0: continue prof, pays = sample_map.setdefault(num, ([], [])) prof.append(prof_samp) pays.append(pay_samp[..., :num]) if sample_map: profiles = np.concatenate(list(itertools.chain.from_iterable( x[0] for x in sample_map.values())), 0) sample_payoffs = tuple(np.concatenate(x[1]) for x in sample_map.values()) else: # No data profiles = np.empty((0, game.num_role_strats), dtype=int) sample_payoffs = [] return rsgame.samplegame_copy(game, profiles, sample_payoffs, False)
def split_data(chars, batch_size, num_steps, split_frac=0.9): """ Split character data into training and validation sets, inputs and targets for each set. Arguments --------- chars: character array batch_size: Size of examples in each of batch num_steps: Number of sequence steps to keep in the input and pass to the network split_frac: Fraction of batches to keep in the training set Returns train_x, train_y, val_x, val_y """ slice_size = batch_size * num_steps n_batches = int(len(chars) / slice_size) # Drop the last few characters to make only full batches x = chars[: n_batches * slice_size] y = chars[1: n_batches * slice_size + 1] # Split the data into batch_size slices, then stack them into a 2D matrix x = np.stack(np.split(x, batch_size)) y = np.stack(np.split(y, batch_size)) # Now x and y are arrays with dimensions batch_size x n_batches*num_steps # Split into training and validation sets, keep the first split_frac batches for training split_idx = int(n_batches * split_frac) train_x, train_y = x[:, :split_idx * num_steps], y[:, :split_idx * num_steps] val_x, val_y = x[:, split_idx * num_steps:], y[:, split_idx * num_steps:] return train_x, train_y, val_x, val_y
def test_stratified_batches(): data = np.array([('a', -1), ('b', 0), ('c', 1), ('d', -1), ('e', -1)], dtype=[('x', np.str_, 8), ('y', np.int32)]) assert list(data['x']) == ['a', 'b', 'c', 'd', 'e'] assert list(data['y']) == [-1, 0, 1, -1, -1] batch_generator = training_batches(data, batch_size=3, n_labeled_per_batch=1) first_ten_batches = list(islice(batch_generator, 10)) labeled_batch_portions = [batch[:1] for batch in first_ten_batches] unlabeled_batch_portions = [batch[1:] for batch in first_ten_batches] labeled_epochs = np.split(np.concatenate(labeled_batch_portions), 5) unlabeled_epochs = np.split(np.concatenate(unlabeled_batch_portions), 4) assert ([sorted(items['x'].tolist()) for items in labeled_epochs] == [['b', 'c']] * 5) assert ([sorted(items['y'].tolist()) for items in labeled_epochs] == [[0, 1]] * 5) assert ([sorted(items['x'].tolist()) for items in unlabeled_epochs] == [['a', 'b', 'c', 'd', 'e']] * 4) assert ([sorted(items['y'].tolist()) for items in unlabeled_epochs] == [[-1, -1, -1, -1, -1]] * 4)
def _feed_dict(self, train_batch, is_training=True): pred_polys = train_batch['raw_polys'] * np.expand_dims(train_batch['masks'], axis=2) # (seq,batch,2) pred_polys = np.transpose(pred_polys, [1, 0, 2]) # (batch,seq,2) pred_mask = np.transpose(train_batch['masks'], [1, 0]) # (batch_size,seq_len) cnn_feats = train_batch['cnn_feats'] # (batch_size, 28, 28, 128) cells_1 = np.stack([np.split(train_batch['hiddens_list'][-1][0], 2, axis=3)[0]], axis=1) cells_2 = np.stack([np.split(train_batch['hiddens_list'][-1][1], 2, axis=3)[0]], axis=1) pred_mask_imgs = self.draw_mask(28, 28, pred_polys, pred_mask) if is_training: raise NotImplementedError() r = { self._ph.cells_1: cells_1, self._ph.cells_2: cells_2, self._ph.pred_mask_imgs: pred_mask_imgs, self._ph.cnn_feats: cnn_feats, self._ph.predicted_mask: pred_mask, self._ph.pred_polys: pred_polys, self._ph.ious: self._zero_batch } return r
def update_h(sigma2, phi, y, mu, psi): """Updates the hidden variables using updated parameters. This is an implementation of the equation: .. math:: \\hat{h} = (\\sigma^2 I + \\sum_{n=1}^N \\Phi_n^T A^T A \\Phi_n)^{-1} \\sum_{n=1}^N \\Phi_n^T A^T (y_n - A \\mu_n - b) """ N = y.shape[0] K = phi.shape[1] A = psi.params[:2, :2] b = psi.translation partial_0 = 0 for phi_n in np.split(phi, N, axis=0): partial_0 += phi_n.T @ A.T @ A @ phi_n partial_1 = sigma2 * np.eye(K) + partial_0 partial_2 = np.zeros((K, 1)) for phi_n, y_n, mu_n in zip(np.split(phi, N, axis=0), y, mu.reshape(-1, 2)): partial_2 += phi_n.T @ A.T @ (y_n - A @ mu_n - b).reshape(2, -1) return np.linalg.inv(partial_1) @ partial_2
def read_dataset(train_size, scale=False, normalize=False): logging.info('fetching the dataset') # d = sklearn.datasets.load_diabetes() # 糖尿病 #d = sklearn.datasets.load_boston() # ボストン住宅価格 # data = d['data'].astype(np.float32) target = d['target'].astype(np.float32).reshape(len(d['target']), 1) #"Chainerのmnist.pyだと下記ののような書き方になっているが、ミニバッチの数が2以上だと動かない"らしい #target = diabetes['target'].astype(np.float32) # 本来訓練データで標準化・正規化して、そのパラメータをテストデータに適用すべき if normalize and scale: raise Exception('both normalize and scale can not be True') if normalize: data = preprocessing.normalize(data) target = preprocessing.normalize(target) if scale: data = preprocessing.scale(data) target = preprocessing.scale(target) # 分割 x_train, x_test = np.split(data, [train_size]) y_train, y_test = np.split(target, [train_size]) assert len(x_train)==len(y_train) assert len(x_test)==len(y_test) return ((x_train, y_train), (x_test, y_test), {"SHAPE_TRAIN_X":x_train.shape, "SHAPE_TRAIN_Y":y_train.shape, "SHAPE_TEST_X":x_test.shape, "SHAPE_TEST_Y":y_test.shape, })
def split_dataset(dataset, N=4000): perm = np.random.permutation(len(dataset['target'])) dataset['data'] = dataset['data'][perm] dataset['target'] = dataset['target'][perm] x_train, x_test = np.split(dataset['data'], [N]) y_train, y_test = np.split(dataset['target'], [N]) return x_train, y_train, x_test, y_test
def spiralroll(B, orient=1): ''' undo spiral flatten ''' k = int(np.sqrt(B.size)) if k**2-B.size != 0: print('ERR: unable to form a square 2D array!') else: C = np.copy(B) C = C[::-1] if k%2: A, C = np.split(C, [1]) A = A.reshape(1,1) start = 2 else: A, C = np.split(C, [4]) A = A[::-1].reshape(2,2) A[-1] = A[-1, ::-1] start = 3 for ix in range(start, k, 2): A = np.pad(A, ((1, 1), (1, 1)), mode='constant') C1, C2, C3, C4, C = np.split(C, [ix, ix*2, ix*3, ix*4]) A[1:, 0] = C1 A[-1, 1:] = C2 A[-2::-1, -1] = C3 A[0, -2::-1] = C4 if orient is 0: A = A.T return A
def get_train_data(self, label_types): labeled_images = self.get_labeled_images() x_train_all = np.asarray(map( lambda labeled_image_file: labeled_image_file.get_image(), labeled_images )) y_train_all = np.asarray(map( lambda labeled_image_file: label_to_output(labeled_image_file.get_label(), label_types), labeled_images )) length = len(labeled_images) # 元データをランダムに並べ替える indexes = np.random.permutation(length) x_train_all_rand = x_train_all[indexes] y_train_all_rand = y_train_all[indexes] # 平均画像を引く mean = self.get_mean_image() if mean is not None: x_train_all_rand -= mean # 正規化 x_train_all /= 255 # 1/5はテストに使う data_size = length * 4 / 5 x_train, x_test = np.split(x_train_all_rand, [data_size]) y_train, y_test = np.split(y_train_all_rand, [data_size]) return x_train, x_test, y_train, y_test
def make_batch(self): # make datasets x_dataset, y_dataset = ps.make_sente_datasets(1,100) #print(x_dataset[110]) #print(y_dataset[110]) x_dataset = np.asarray(x_dataset) y_dataset = np.asarray(y_dataset) nb_data = x_dataset.shape[0] x_train,x_test = np.split(x_dataset,[nb_data*0.9]) y_train,y_test = np.split(y_dataset,[nb_data*0.9]) #x_train = x_train.reshape(x_train.shape[0], 1, 15, 9) #x_test = x_test.reshape(x_test.shape[0], 1, 15, 9) x_train = x_train.reshape(x_train.shape[0], 1, 11, 9) x_test = x_test.reshape(x_test.shape[0], 1, 11, 9) y_train = np_utils.to_categorical(y_train, nb_classes) y_test = np_utils.to_categorical(y_test, nb_classes) print("x_train shape:", x_train.shape) print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") return x_train, y_train, x_test, y_test
def split_data(X,Y,degree): Testing_error =[] #all the testing errors of 10 fold cross validations Training_error = [] #all the training errors of 10 fold cross validations X_sets = np.split(X,10) Y_sets = np.split(Y,10) for i in range(len(X_sets)): X_test =np.vstack( X_sets[i]) Y_test = np.vstack(Y_sets[i]) if i<len(X_sets)-1: X_train = np.vstack(X_sets[i+1:]) Y_train =np.vstack(Y_sets[i+1:]) elif i==len(X_sets)-1 : X_train = np.vstack(X_sets[:i]) Y_train = np.vstack(Y_sets[:i]) while i>0: tempX = np.vstack(X_sets[i-1]) X_train = np.append(tempX,X_train) tempY = np.vstack(Y_sets[i-1]) Y_train = np.append(tempY,Y_train) i = i-1 X_train = np.vstack(X_train) Y_train = np.vstack(Y_train) Z_train,theta,Z_test = polynomial_withCV(X_train,Y_train,degree,X_test) Testing_error.append( mse(Z_test,theta,Y_test)) Training_error.append(mse(Z_train,theta,Y_train)) return sum(Testing_error),sum(Training_error)
def reconstruct2(self, nl, l1, l2): """ To reconstruct Python List of lists / numpy arrays. Inverse operation of FLATTEN() above. Usage: L_reconstructed = reconstruct2(L_flat,l1,l2) Source: http://stackoverflow.com/questions/27982432/flattening-and-unflattening-a-nested-list-of-numpy-arrays """ return np.split(np.split(nl,np.cumsum(l1)),np.cumsum(l2))[:-1]
def conf2yap(conf_fname, yap_filename): print("Yap file : ", yap_filename) positions, radii, meta = clff.read_conf_file(conf_fname) positions[:, 0] -= float(meta['lx'])/2 positions[:, 1] -= float(meta['ly'])/2 positions[:, 2] -= float(meta['lz'])/2 if 'np_fixed' in meta: # for conf with fixed particles split_line = len(positions) - int(meta['np_fixed']) pos_mobile, pos_fixed = np.split(positions, [split_line]) rad_mobile, rad_fixed = np.split(radii, [split_line]) yap_out = pyp.layer_switch(3) yap_out = pyp.add_color_switch(yap_out, 3) yap_out = np.row_stack((yap_out, particles_yaparray(pos_mobile, rad_mobile))) yap_out = pyp.add_layer_switch(yap_out, 4) yap_out = pyp.add_color_switch(yap_out, 4) yap_out = np.row_stack((yap_out, particles_yaparray(pos_fixed, rad_fixed))) else: yap_out = pyp.layer_switch(3) yap_out = pyp.add_color_switch(yap_out, 3) yap_out = np.row_stack((yap_out, particles_yaparray(positions, radii))) pyp.savetxt(yap_filename, yap_out)
def create_batches(self,samples): sample_size = len(samples) self.num_batches = math.ceil(sample_size /self.batch_size) new_sample_size = self.num_batches * self.batch_size # Create the batch tensor # x_lengths = [len(sample) for sample in samples] x_lengths = [] x_seqs = np.ndarray((new_sample_size,self.seq_max_length),dtype=np.int32) y_seqs = np.ndarray((new_sample_size,self.seq_max_length),dtype=np.int32) self.x_lengths = [] for i,sample in enumerate(samples): # fill with padding to align batchSize samples into one 2D list x_lengths.append(len(sample)) x_seqs[i] = sample + [self.padToken] * (self.seq_max_length - len(sample)) for i in range(sample_size,new_sample_size): copyi = i - sample_size x_seqs[i] = x_seqs[copyi] x_lengths.append(x_lengths[copyi]) y_seqs[:,:-1] = x_seqs[:,1:] y_seqs[:,-1] = x_seqs[:,0] x_len_array = np.array(x_lengths) self.x_batches = np.split(x_seqs.reshape(self.batch_size, -1), self.num_batches, 1) self.x_len_batches = np.split(x_len_array.reshape(self.batch_size, -1), self.num_batches, 1) self.y_batches = np.split(y_seqs.reshape(self.batch_size, -1), self.num_batches, 1)
def train_net( net, x_train, y_train, images, b_name='\033[30mbaseline_%s\033[0m', f_name='\033[30mfollow_%s\033[0m', d_name='\033[30mdeformation_%s\033[0m' ): defo = False d_inputs = [] c = color_codes() n_images = len(images) # We try to get the last weights to keep improving the net over and over if isinstance(x_train, tuple): defo = True x_train, defo_train = x_train defo_train = np.split(defo_train, len(images), axis=1) d_inputs = [(d_name % im, np.squeeze(d_im)) for im, d_im in zip(images, defo_train)] print(c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c['g'] + 'Training' + c['nc']) n_channels = x_train.shape[1] x_train = np.split(x_train, n_channels, axis=1) b_inputs = [(b_name % im, x_im) for im, x_im in zip(images, x_train[:n_images])] f_inputs = [(f_name % im, x_im) for im, x_im in zip(images, x_train[n_images:])] inputs = dict(b_inputs + f_inputs) if not defo else dict(b_inputs + f_inputs + d_inputs) net.fit(inputs, y_train)
def blocksort2D(sfield, ofield, db): """ Takes two nx x ny fields and divides them into blocks - the new fields have dimensions nx' x ny' where nx' = nx/db, ny' = ny/db db is half the block width in number of grid cells. the fields are averaged over the block area (db points) and then ofield is sorted according to sfield (spatial structure is lost) the returned value is a dictionary with sfield as the key and ofield as the value assumes nx = ny = even integer. db must be a multiple of nx """ nx = sfield.shape[0] ny = sfield.shape[1] nxblock = nx / db nyblock = ny / db #tave_field = np.mean(field[ti-ntave:ti,:,:]) #tave_field = np.squeeze(tave_field) #split up field column-wise, take average row-wise. then split up resulting field row-wise, and take average column-wise. blocksfield = np.average(np.split(np.average(np.split(sfield, nxblock, axis=1), axis=-1), nyblock, axis=1), axis=-1) blockofield = np.average(np.split(np.average(np.split(ofield, nxblock, axis=1), axis=-1), nyblock, axis=1), axis=-1) blocksfield = blocksfield.flatten() blockofield = blockofield.flatten() d = dict(zip(blocksfield, blockofield)) od = collections.OrderedDict(sorted(d.items())) return od
def blockave2D(field, db): """ Takes a nx x ny field and divides the field into blocks - the new field has dimensions nx' x ny' where nx' = nx/db, ny' = ny/db db is the block width in number of grid cells. the field is averaged over the block area (db points) in the case of 3D field, averaging is only performed in horizontal direction. assumes nx = ny = even integer. db must be a multiple of nx """ nx = field.shape[0] ny = field.shape[1] nxblock = nx // db nyblock = ny // db #split up field column-wise, take average row-wise. then split up resulting field row-wise, and take average column-wise. blockfield = np.average(np.split(np.average(np.split(field, nxblock, axis=1), axis=-1), nyblock, axis=1), axis=-1) return blockfield
def gradient_p(X,y,theta,alpha,m,numIterations): errors1_x1 = 0 errors1_x2 = 0 errors2_x1 = 0 errors2_x2 = 0 x1,x2 = np.split(X,2) y1,y2 = np.split(y,2) for i in range(0,numIterations): h1 = x1.dot(theta) errors1_x1 = (h1 - y1) * x1[:, 0] errors1_x2 = (h1 - y1) * x1[:, 1] h2 = x2.dot(theta) errors2_x1 = (h2 - y2) * x2[:, 0] errors2_x2 = (h2 - y2) * x2[:, 1] theta[0]=theta[0]-(alpha/m)*(errors1_x1.sum()+errors2_x1.sum()) theta[1]=theta[1]-(alpha/m)*(errors1_x2.sum()+errors2_x2.sum()) return theta
def split_x(x, split_pos): # NOTE: do not support multiple sentence tensors # sequence input , non-sequence input, and no non-sequence input # sequence input: if type(x) is not list: x=[x] if len(x) == 1: # sec1, sec2, sec3,... # sent1, sent2, sent5 x01, x02 = tuple(np.split(x[0],[split_pos])) cond_list=[x02>=0,x02<0] offset = x02[0][0] choice_list=[x02-offset, x02 ] x02 = np.select(cond_list, choice_list) return ([x01],[x02]) # doc1 doc2 doc3 # sec1 sec2 ... # sec1, sec2, ... # sent1, sent2, ... x01, x02 = tuple(np.split(x[0], [split_pos])) offset = x02[0][0] x1, x2 = split_x(x[1:], offset) cond_list = [x02 >= 0, x02 < 0] choice_list = [x02 - offset, x02] x02 = np.select(cond_list, choice_list) return ([x01] + x1, [x02]+x2)
def k_fold_cross_validation_sets(X, y, k, shuffle=True): if shuffle: X, y = shuffle_data(X, y) n_samples = len(y) left_overs = {} n_left_overs = (n_samples % k) if n_left_overs != 0: left_overs["X"] = X[-n_left_overs:] left_overs["y"] = y[-n_left_overs:] X = X[:-n_left_overs] y = y[:-n_left_overs] X_split = np.split(X, k) y_split = np.split(y, k) sets = [] for i in range(k): X_test, y_test = X_split[i], y_split[i] X_train = np.concatenate(X_split[:i] + X_split[i + 1:], axis=0) y_train = np.concatenate(y_split[:i] + y_split[i + 1:], axis=0) sets.append([X_train, X_test, y_train, y_test]) # Add left over samples to last set as training samples if n_left_overs != 0: np.append(sets[-1][0], left_overs["X"], axis=0) np.append(sets[-1][2], left_overs["y"], axis=0) return np.array(sets)
def to_json(self): base = super().to_json() base['offsets'] = self.payoff_to_json(self._offset) base['coefs'] = self.payoff_to_json(self._coefs) lengths = {} for role, strats, lens in zip( self.role_names, self.strat_names, np.split(self._lengths, self.role_starts[1:])): lengths[role] = {s: self.payoff_to_json(l) for s, l in zip(strats, lens)} base['lengths'] = lengths profs = {} for role, strats, data in zip( self.role_names, self.strat_names, np.split(np.split(self._profiles, self._size_starts[1:]), self.role_starts[1:])): profs[role] = {strat: [self.profile_to_json(p) for p in dat] for strat, dat in zip(strats, data)} base['profiles'] = profs alphas = {} for role, strats, alphs in zip( self.role_names, self.strat_names, np.split(np.split(self._alpha, self._size_starts[1:]), self.role_starts[1:])): alphas[role] = {s: a.tolist() for s, a in zip(strats, alphs)} base['alphas'] = alphas base['type'] = 'rbf.1' return base
def make_predictions(net, data, labels, num_classes): data = np.require(data, requirements='C') labels = np.require(labels, requirements='C') preds = np.zeros((data.shape[1], num_classes), dtype=np.single) softmax_idx = net.get_layer_idx('probs', check_type='softmax') t0 = time.time() net.libmodel.startFeatureWriter( [data, labels, preds], softmax_idx) net.finish_batch() print "Predicted %s cases in %.2f seconds." % ( labels.shape[1], time.time() - t0) if net.multiview_test: # We have to deal with num_samples * num_views # predictions. num_views = net.test_data_provider.num_views num_samples = labels.shape[1] / num_views split_sections = range( num_samples, num_samples * num_views, num_samples) preds = np.split(preds, split_sections, axis=0) labels = np.split(labels, split_sections, axis=1) preds = reduce(np.add, preds) labels = labels[0] return preds, labels
def update_stipples(self, cells): """ Updates stipple locations from an image cells should be an image of the same size as self.img with pixel values representing which Voronoi cell that pixel falls into """ indices = np.argsort(cells.flat) _, boundaries = np.unique(cells.flat[indices], return_index=True) gxs = np.split(self.gx.flat[indices], boundaries)[1:] gys = np.split(self.gy.flat[indices], boundaries)[1:] gws = np.split(1 - self.img.flat[indices], boundaries)[1:] w = self.img.shape[1] / 2.0 h = self.img.shape[0] / 2.0 for i, (gx, gy, gw) in enumerate(zip(gxs, gys, gws)): weight = np.sum(gw) if weight > 0: x = np.sum(gx * gw) / weight y = np.sum(gy * gw) / weight self.stipples[i,:] = [(x - w) / w, (y - h) / h] else: self.stipples[i,:] = np.random.uniform(-1, 1, size=2)
def generate_svm(): digits, labels = load_digits(DIGITS_FN) print('preprocessing...') # shuffle digits rand = np.random.RandomState(321) shuffle = rand.permutation(len(digits)) digits, labels = digits[shuffle], labels[shuffle] digits2 = list(map(deskew, digits)) samples = preprocess_hog(digits2) train_n = int(0.9*len(samples)) cv2.imshow('test set', mosaic(25, digits[train_n:])) digits_train, digits_test = np.split(digits2, [train_n]) samples_train, samples_test = np.split(samples, [train_n]) labels_train, labels_test = np.split(labels, [train_n]) print('training SVM...') model = SVM(C=2.67, gamma=5.383) model.train(samples_train, labels_train) vis = evaluate_model(model, digits_test, samples_test, labels_test) print('saving SVM as "digits_svm.dat"...') return model cv2.waitKey(0)
def train(self, trainfile_name): train_X, train_Y, num_classes = self.make_data(trainfile_name) accuracies = [] fscores = [] if self.cv: num_points = train_X.shape[0] fol_len = num_points / self.folds rem = num_points % self.folds X_folds = numpy.split(train_X, self.folds) if rem == 0 else numpy.split(train_X[:-rem], self.folds) Y_folds = numpy.split(train_Y, self.folds) if rem == 0 else numpy.split(train_Y[:-rem], self.folds) for i in range(self.folds): train_folds_X = [] train_folds_Y = [] for j in range(self.folds): if i != j: train_folds_X.append(X_folds[j]) train_folds_Y.append(Y_folds[j]) train_fold_X = numpy.concatenate(train_folds_X) train_fold_Y = numpy.concatenate(train_folds_Y) classifier = self.fit_model(train_fold_X, train_fold_Y, num_classes) predictions = self.classify(classifier, X_folds[i]) accuracy, weighted_fscore, _ = self.evaluate(Y_folds[i], predictions) accuracies.append(accuracy) fscores.append(weighted_fscore) accuracies = numpy.asarray(accuracies) fscores = numpy.asarray(fscores) print >>sys.stderr, "Accuracies:", accuracies print >>sys.stderr, "Average: %0.4f (+/- %0.4f)"%(accuracies.mean(), accuracies.std() * 2) print >>sys.stderr, "Fscores:", fscores print >>sys.stderr, "Average: %0.4f (+/- %0.4f)"%(fscores.mean(), fscores.std() * 2) self.classifier = self.fit_model(train_X, train_Y, num_classes) cPickle.dump(classifier, open(self.trained_model_name, "wb")) #pickle.dump(tagset, open(self.stored_tagset, "wb")) print >>sys.stderr, "Done"
def main(): parser = argparse.ArgumentParser() parser.add_argument('--checkpoint_dir', type=str, default='checkpoints', help='checkpoint directory') parser.add_argument('--save_every', type=int, default=1000, help='save frequency') args = parser.parse_args() # Read the training data inputFile = open("data/input.txt","rU") trainingData = inputFile.read() # Count vocab counter = collections.Counter(trainingData) count_pairs = sorted(counter.items(), key=lambda x: -x[1]) chars, _ = list(zip(*count_pairs)) vocabSize = len(chars) print vocabSize vocab = dict(zip(chars, range(len(chars)))) inputTensor = np.array(map(vocab.get, trainingData)) numBatches = inputTensor.size / (batchSize * numSteps) print numBatches inputTensor = inputTensor[:numBatches * batchSize * numSteps] inData = inputTensor targetData = np.copy(inputTensor) targetData[:-1] = inData[1:] targetData[-1] = inData[0] inDataBatches = np.split(inData.reshape(batchSize, -1), numBatches, 1) targetDataBatches = np.split(targetData.reshape(batchSize, -1), numBatches, 1) lstmTrain(args)
def get_medium_estimator(self, measurements): """ """ num_of_mediums = self.args.num_mediums cv_index = self.args.use_cross_validation time_list = measurements.time_list if cv_index >= 0: time_list = np.delete(time_list, cv_index) assert isinstance(num_of_mediums, int) and num_of_mediums <= len(time_list) wavelength = measurements.wavelength if not isinstance(wavelength,list): wavelength = [wavelength] # Define the grid for reconstruction grid = albedo_grid = phase_grid = shdom.Grid(bounding_box=measurements.bb,nx=self.args.nx,ny=self.args.ny,nz=self.args.nz) if self.args.assume_moving_cloud: cloud_velocity = None else: cloud_velocity = [0,0,0] # Find a cloud mask for non-cloudy grid points dynamic_carver = shdom.DynamicSpaceCarver(measurements) mask_list, dynamic_grid, cloud_velocity = dynamic_carver.carve(grid, agreement=0.70, time_list = measurements.time_list, thresholds=self.args.radiance_threshold, vx_max = 10, vy_max=10, gt_velocity = cloud_velocity) mask = mask_list[0] show_mask=1 if show_mask: a = mask.data.astype(int) shdom.cloud_plot(a) print(cloud_velocity) print(sum(sum(sum(a)))) table_path = self.args.mie_base_path.replace('<wavelength>', '{}'.format(shdom.int_round(wavelength[0]))) self.cloud_generator.add_mie(table_path) albedo = self.cloud_generator.get_albedo(wavelength[0], [albedo_grid] * num_of_mediums) phase = self.cloud_generator.get_phase(wavelength[0], [phase_grid] * num_of_mediums) # cv_index = self.args.use_cross_validation # if cv_index >= 0: # # del dynamic_grid[cv_index] # # del mask_list[cv_index] # # del albedo[cv_index] # # del phase[cv_index] # time_list = np.delete(measurements.time_list, cv_index) time_list = np.mean(np.split(time_list, num_of_mediums), 1) extinction = shdom.DynamicGridDataEstimator(self.cloud_generator.get_extinction(wavelength, [grid] * num_of_mediums), min_bound=1e-5, max_bound=2e2) kw_optical_scatterer = {"extinction": extinction, "albedo": albedo, "phase": phase} cloud_estimator = shdom.DynamicScattererEstimator(wavelength=wavelength, time_list=time_list, **kw_optical_scatterer) cloud_estimator.set_mask([mask] * num_of_mediums) # Create a medium estimator object (optional Rayleigh scattering) air = self.air_generator.get_scatterer(wavelength) medium_estimator = shdom.DynamicMediumEstimator(cloud_estimator, air, cloud_velocity) return medium_estimator
def split_array(v1, v2, pieces_number): res_v1, res_v2 = np.split(v1, pieces_number), np.split(v2, pieces_number) for i in range(len(res_v1)): if res_v1[i].size > 0: yield (res_v1[i], res_v2[i])
def label_ims(ims_batch, labels=None, inverse_normalize=False, normalize=False, clip_flow=10, display_h=128, pad_top=None, clip_norm=None, padding_size=0, padding_color=255, border_size=0, border_color=0, color_space='rgb', combine_from_axis=0, concat_axis=0, interp=cv2.INTER_LINEAR): ''' Displays a batch of matrices as an image. :param ims_batch: n_batches x h x w x c array of images. :param labels: optional labels. Can be an n_batches length list of tuples, floats or strings :param inverse_normalize: boolean to do normalization from [-1, 1] to [0, 255] :param normalize: boolean to normalize any [min, max] to [0, 255] :param clip_flow: float for the min, max absolute flow magnitude to display :param display_h: integer number of pixels for the height of each image to display :param pad_top: integer number of pixels to pad each image at the top with (for more readable labels) :param color_space: string of either 'rgb' or 'ycbcr' to do color space conversion before displaying :param concat_axis: integer axis number to concatenate batch along (default is 0 for rows) :return: ''' if isinstance(ims_batch, np.ndarray) and len( ims_batch.shape) == 3 and ims_batch.shape[-1] == 3: # already an image return ims_batch # transpose the image until batches are in the 0th axis if not combine_from_axis == 0: # compute all remaining axes all_axes = list(range(len(ims_batch.shape))) del all_axes[combine_from_axis] ims_batch = np.transpose(ims_batch, (combine_from_axis, ) + tuple(all_axes)) batch_size = len(ims_batch) # works for lists and np arrays h = ims_batch[0].shape[0] w = ims_batch[0].shape[1] if len(ims_batch[0].shape) == 2: n_chans = 1 else: n_chans = ims_batch[0].shape[-1] if type(labels) == list and len(labels) == 1: # only label the first image labels = labels + [''] * (batch_size - 1) elif labels is not None and not type(labels) == list and not type( labels) == np.ndarray: labels = [labels] * batch_size scale_factor = display_h / float(h) if pad_top: im_h = int(display_h + pad_top) else: im_h = display_h im_w = round(scale_factor * float(w)) # make sure we have a channels dimension if len(ims_batch.shape) < 4: ims_batch = np.expand_dims(ims_batch, 3) if ims_batch.shape[-1] == 2: # assume to be x,y flow; map to color im X_fullcolor = np.concatenate( [ims_batch.copy(), np.zeros(ims_batch.shape[:-1] + (1, ))], axis=3) if labels is not None: labels = [''] * batch_size for i in range(batch_size): X_fullcolor[i], min_flow, max_flow = flow_to_im( ims_batch[i], clip_flow=clip_flow) # also include the min and max flow in the label if labels[i] is not None: labels[i] = '{},'.format(labels[i]) else: labels[i] = '' for c in range(len(min_flow)): labels[i] += '({}, {})'.format(round(min_flow[c], 1), round(max_flow[c], 1)) ims_batch = X_fullcolor.copy() elif ims_batch.shape[-1] > 3: # not an image, probably labels n_labels = ims_batch.shape[-1] cmap = make_cmap_rainbow(n_labels) labels_im = classification_utils.onehot_to_labels( ims_batch, n_classes=ims_batch.shape[-1]) labels_im_flat = labels_im.flatten() labeled_im_flat = np.tile(labels_im_flat[..., np.newaxis], (1, 3)).astype(np.float32) #for ei in range(batch_size): for l in range(n_labels): labeled_im_flat[labels_im_flat == l, :] = cmap[l] ims_batch = labeled_im_flat.reshape((-1, ) + ims_batch.shape[1:-1] + (3, )) elif inverse_normalize: ims_batch = image_utils.inverse_normalize(ims_batch) elif normalize: flattened_dims = np.prod(ims_batch.shape[1:]) X_spatially_flat = np.reshape(ims_batch, (batch_size, -1, n_chans)) X_orig_min = np.min(X_spatially_flat, axis=1) X_orig_max = np.max(X_spatially_flat, axis=1) # now actually flatten and normalize across channels X_flat = np.reshape(ims_batch, (batch_size, -1)) if clip_norm is None: X_flat = X_flat - np.tile(np.min(X_flat, axis=1, keepdims=True), (1, flattened_dims)) # avoid dividing by 0 X_flat = X_flat / np.clip( np.tile(np.max(X_flat, axis=1, keepdims=True), (1, flattened_dims)), 1e-5, None) else: X_flat = X_flat - (-float(clip_norm)) # avoid dividing by 0 X_flat = X_flat / (2. * clip_norm) #X_flat = X_flat - np.tile(np.min(X_flat, axis=1, keepdims=True), (1, flattened_dims)) # avoid dividing by 0 #X_flat = X_flat / np.clip(np.tile(np.max(X_flat, axis=1, keepdims=True), (1, flattened_dims)), 1e-5, None) ims_batch = np.reshape(X_flat, ims_batch.shape) ims_batch = np.clip(ims_batch.astype(np.float32), 0., 1.) for i in range(batch_size): if labels is not None and len(labels) > 0: if labels[i] is not None: labels[i] = '{},'.format(labels[i]) else: labels[i] = '' # show the min, max of each channel for c in range(n_chans): labels[i] += '({:.2f}, {:.2f})'.format( round(X_orig_min[i, c], 2), round(X_orig_max[i, c], 2)) else: ims_batch = np.clip(ims_batch, 0., 1.) if color_space == 'ycbcr': for i in range(batch_size): ims_batch[i] = cv2.cvtColor(ims_batch[i], cv2.COLOR_YCR_CB2BGR) if np.max(ims_batch) <= 1.0: ims_batch = ims_batch * 255.0 out_im = [] for i in range(batch_size): # convert grayscale to rgb if needed if len(ims_batch[i].shape) == 2: curr_im = np.tile(np.expand_dims(ims_batch[i], axis=-1), (1, 1, 3)) elif ims_batch.shape[-1] == 1: curr_im = np.tile(ims_batch[i], (1, 1, 3)) else: curr_im = ims_batch[i] # scale to specified display size if not scale_factor == 1: curr_im = cv2.resize(curr_im, None, fx=scale_factor, fy=scale_factor, interpolation=interp) if pad_top: curr_im = np.concatenate([ np.zeros( (pad_top, curr_im.shape[1], curr_im.shape[2])), curr_im ], axis=0) if border_size > 0: # add a border all around the image curr_im = cv2.copyMakeBorder(curr_im, border_size, border_size, border_size, border_size, borderType=cv2.BORDER_CONSTANT, value=border_color) if padding_size > 0 and i < batch_size - 1: # include a border between images padding_shape = list(curr_im.shape[:3]) padding_shape[concat_axis] = padding_size curr_im = np.concatenate( [curr_im, np.ones(padding_shape) * padding_color], axis=concat_axis) out_im.append(curr_im) if display_h > 50: font_size = 15 else: font_size = 10 if concat_axis is not None: out_im = np.concatenate(out_im, axis=concat_axis).astype(np.uint8) else: out_im = np.concatenate(out_im, axis=0).astype(np.uint8) max_text_width = int(17 * display_h / 128.) # empirically determined if labels is not None and len(labels) > 0: im_pil = Image.fromarray(out_im) draw = ImageDraw.Draw(im_pil) for i in range(batch_size): if len(labels) > i: # if we have a label for this image if type(labels[i]) == tuple or type(labels[i]) == list: # format tuple or list nicely formatted_text = ', '.join([ labels[i][j].decode('UTF-8') if type(labels[i][j]) == np.unicode_ \ else labels[i][j] if type(labels[i][j]) == str \ else str(round(labels[i][j], 2)) if isinstance(labels[i][j], float) \ else str(labels[i][j]) for j in range(len(labels[i]))]) elif type(labels[i]) == float or type(labels[i]) == np.float32: formatted_text = str(round(labels[i], 2)) # round floats to 2 digits elif isinstance(labels[i], np.ndarray): # assume that this is a 1D array curr_labels = np.squeeze(labels[i]).astype(np.float32) formatted_text = np.array2string(curr_labels, precision=2, separator=',') # ', '.join(['{}'.format( # np.around(labels[i][j], 2)) for j in range(labels[i].size)]) else: formatted_text = '{}'.format(labels[i]) if display_h > 30: # only print label if we have room try: font = ImageFont.truetype('Ubuntu-M.ttf', font_size) except: font = ImageFont.truetype('arial.ttf', font_size) # wrap the text so it fits formatted_text = textwrap.wrap(formatted_text, width=max_text_width) for li, line in enumerate(formatted_text): if concat_axis == 0: draw.text((5, i * im_h + 5 + 14 * li), line, font=font, fill=(50, 50, 255)) elif concat_axis == 1: draw.text((5 + i * im_w, 5 + 14 * li), line, font=font, fill=(50, 50, 255)) out_im = np.asarray(im_pil) # else: # out_im = [im.astype(np.uint8) for im in out_im] # # max_text_width = int(17 * display_h / 128.) # empirically determined # if labels is not None and len(labels) > 0: # for i, im in enumerate(out_im): # im_pil = Image.fromarray(im) # draw = ImageDraw.Draw(im_pil) # # # if len(labels) > i: # if we have a label for this image # if type(labels[i]) == tuple or type(labels[i]) == list: # # format tuple or list nicely # formatted_text = ', '.join([ # labels[i][j].decode('UTF-8') if type(labels[i][j]) == np.unicode_ \ # else labels[i][j] if type(labels[i][j]) == str \ # else str(round(labels[i][j], 2)) if isinstance(labels[i][j], float) \ # else str(labels[i][j]) for j in range(len(labels[i]))]) # elif type(labels[i]) == float or type(labels[i]) == np.float32: # formatted_text = str(round(labels[i], 2)) # round floats to 2 digits # elif isinstance(labels[i], np.ndarray): # # assume that this is a 1D array # curr_labels = np.squeeze(labels[i]).astype(np.float32) # formatted_text = np.array2string(curr_labels, precision=2, separator=',') # # ', '.join(['{}'.format( # # np.around(labels[i][j], 2)) for j in range(labels[i].size)]) # else: # formatted_text = '{}'.format(labels[i]) # # if display_h > 30: # only print label if we have room # try: # font = ImageFont.truetype('Ubuntu-M.ttf', font_size) # except: # font = ImageFont.truetype('arial.ttf', font_size) # # wrap the text so it fits # formatted_text = textwrap.wrap(formatted_text, width=max_text_width) # # for li, line in enumerate(formatted_text): # draw.text((5, 5 + 14 * li), line, font=font, fill=(50, 50, 255)) # im = np.asarray(im_pil) if concat_axis is None: # un-concat the image. faster this way out_im = np.split(out_im, batch_size, axis=combine_from_axis) return out_im
def make_random_iter(self): splits = np.arange(self.batch_size, len(self.inputs), self.batch_size) np.random.seed(42) it = np.split(np.random.permutation(range(len(self.inputs))), splits)[:-1] return iter(it)
def unflatten(y, cuts): return np.split(y, cuts) if cuts else y
print __doc__ digits, labels = load_digits(DIGITS_FN) print 'preprocessing...' # shuffle digits rand = np.random.RandomState(321) shuffle = rand.permutation(len(digits)) digits, labels = digits[shuffle], labels[shuffle] digits2 = map(deskew, digits) samples = preprocess_hog(digits2) train_n = int(0.9*len(samples)) cv2.imshow('test set', mosaic(25, digits[train_n:])) digits_train, digits_test = np.split(digits2, [train_n]) samples_train, samples_test = np.split(samples, [train_n]) labels_train, labels_test = np.split(labels, [train_n]) print 'training KNearest...' model = KNearest(k=4) model.train(samples_train, labels_train) vis = evaluate_model(model, digits_test, samples_test, labels_test) cv2.imshow('KNearest test', vis) cv2.waitKey(10000) print 'training SVM...' model = SVM(C=2.67, gamma=5.383) model.train(samples_train, labels_train) vis = evaluate_model(model, digits_test, samples_test, labels_test) cv2.imshow('SVM test', vis)
def _unstack(value, num=None, axis=0, name='unstack'): del name value = np.array(value) return list( np.squeeze(x, axis=axis) for x in np.split( value, value.shape[axis] if num is None else num, axis))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--data_dir", type=str, default="/home/abc/shhs/polysomnography/edfs/shhs1", help="File path to the PSG files.") parser.add_argument("--ann_dir", type=str, default="/home/abc/shhs/polysomnography/annotations-events-profusion/shhs1", help="File path to the annotation files.") parser.add_argument("--output_dir", type=str, default="/home/abc/output_npz/shhs", help="Directory where to save numpy files outputs.") parser.add_argument("--select_ch", type=str, default="EEG C4-A1", help="The selected channel") args = parser.parse_args() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) ids = pd.read_csv("selected_shhs1_files.txt", header=None, names='a') ids = ids['a'].values.tolist() edf_fnames = [os.path.join(args.data_dir, i + ".edf") for i in ids] ann_fnames = [os.path.join(args.ann_dir, i + "-profusion.xml") for i in ids] edf_fnames.sort() ann_fnames.sort() edf_fnames = np.asarray(edf_fnames) ann_fnames = np.asarray(ann_fnames) for file_id in range(len(edf_fnames)): if os.path.exists(os.path.join(args.output_dir, edf_fnames[file_id].split('/')[-1])[:-4]+".npz"): continue print(edf_fnames[file_id]) raw = read_raw_edf(edf_fnames[file_id], preload=True, stim_channel=None, verbose=None) sampling_rate = raw.info['sfreq'] ch_type = args.select_ch.split(" ")[0] select_ch = [s for s in raw.info["ch_names"] if ch_type in s][0] raw_ch_df = raw.to_data_frame(scaling_time=sampling_rate)[select_ch] raw_ch_df = raw_ch_df.to_frame() raw_ch_df.set_index(np.arange(len(raw_ch_df))) ################################################### labels = [] # Read annotation and its header t = ET.parse(ann_fnames[file_id]) r = t.getroot() faulty_File = 0 for i in range(len(r[4])): lbl = int(r[4][i].text) if lbl == 4: # make stages N3, N4 same as N3 labels.append(3) elif lbl == 5: # Assign label 4 for REM stage labels.append(4) else: labels.append(lbl) if lbl > 5: # some files may contain labels > 5 BUT not the selected ones. faulty_File = 1 if faulty_File == 1: print( "============================== Faulty file ==================") continue labels = np.asarray(labels) # Remove movement and unknown stages if any raw_ch = raw_ch_df.values print(raw_ch.shape) # Verify that we can split into 30-s epochs if len(raw_ch) % (EPOCH_SEC_SIZE * sampling_rate) != 0: raise Exception("Something wrong") n_epochs = len(raw_ch) / (EPOCH_SEC_SIZE * sampling_rate) # Get epochs and their corresponding labels x = np.asarray(np.split(raw_ch, n_epochs)).astype(np.float32) y = labels.astype(np.int32) print(x.shape) print(y.shape) assert len(x) == len(y) # Select on sleep periods w_edge_mins = 30 nw_idx = np.where(y != 0)[0] start_idx = nw_idx[0] - (w_edge_mins * 2) end_idx = nw_idx[-1] + (w_edge_mins * 2) if start_idx < 0: start_idx = 0 if end_idx >= len(y): end_idx = len(y) - 1 select_idx = np.arange(start_idx, end_idx + 1) print("Data before selection: {}, {}".format(x.shape, y.shape)) x = x[select_idx] y = y[select_idx] print("Data after selection: {}, {}".format(x.shape, y.shape)) # Saving as numpy files filename = os.path.basename(edf_fnames[file_id]).replace(".edf", ".npz") save_dict = { "x": x, "y": y, "fs": sampling_rate } np.savez(os.path.join(args.output_dir, filename), **save_dict) print(" ---------- Done this file ---------")
data = pd.read_csv('full_data.csv', index_col=0) cols = ['apparentTemperature', 'humidity', 'MWh'] df = data[cols] df = (df - df.min()) / (df.max() - df.min()) ##Min-Max Normalization #df = (df - df.mean())/df.std() ##Gaussian normalization inputs = df targets = df['MWh'] #Un-normalized targets #Percentage of samples to use as training data TRAINING_SAMPLE_RATIO = 0.7 num_training_samples = round(len(inputs) * TRAINING_SAMPLE_RATIO) #Splits data samples (training_inputs, test_inputs) = np.split(inputs.values, [num_training_samples]) (training_targets, test_targets) = np.split(targets.values, [num_training_samples]) #Splits timestamps for plotting later (training_t, test_t) = np.split(data['index'].values, [num_training_samples]) #Prepares training data for input to network training_inputs = Variable(torch.from_numpy(training_inputs).float()).cuda() training_targets = Variable(torch.from_numpy(training_targets).float()).cuda() test_inputs = Variable(torch.from_numpy(test_inputs).float()).cuda() test_targets = Variable(torch.from_numpy(test_targets).float()).cuda() # -------------------- Instantiate LSTM Network --------------------- # # Model Params input_dim = training_inputs.shape[1]
def np_split_squeeze(array, axis): axis_len = array.shape[axis] return [ np.squeeze(arr, axis=(axis, )) for arr in np.split(array, axis_len, axis=axis) ]
def generate_random_rois(self, image_shape, count, gt_boxes): """ Generates ROI proposals similar to what a region proposal network would generate. :param image_shape: [Height, Width, Depth] :param count: Number of ROIs to generate :param gt_boxes: [N, (y1, x1, y2, x2)] Ground truth boxes in pixels. :return: """ # placeholder rois = np.zeros((count, 4), dtype=np.int32) # Generate random ROIs around GT boxes (90% of count) rois_per_box = int(0.9 * count / gt_boxes.shape[0]) for i in range(gt_boxes.shape[0]): gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[i] h = gt_y2 - gt_y1 w = gt_x2 - gt_x1 # random boundaries r_y1 = max(gt_y1 - h, 0) r_y2 = min(gt_y2 + h, image_shape[0]) r_x1 = max(gt_x1 - w, 0) r_x2 = min(gt_x2 + w, image_shape[1]) # To avoid generating boxes with zero area, we generate double what # we need and filter out the extra. If we get fewer valid boxes # than we need, we loop and try again. while True: y1y2 = np.random.randint(r_y1, r_y2, (rois_per_box * 2, 2)) x1x2 = np.random.randint(r_x1, r_x2, (rois_per_box * 2, 2)) # Filter out zero area boxes threshold = 1 y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >= threshold][:rois_per_box] x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >= threshold][:rois_per_box] if y1y2.shape[0] == rois_per_box and x1x2.shape[ 0] == rois_per_box: break # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape # into x1, y1, x2, y2 order x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1) y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1) box_rois = np.hstack([y1, x1, y2, x2]) rois[rois_per_box * i:rois_per_box * (i + 1)] = box_rois # Generate random ROIs anywhere in the image (10% of count) remaining_count = count - (rois_per_box * gt_boxes.shape[0]) # To avoid generating boxes with zero area, we generate double what # we need and filter out the extra. If we get fewer valid boxes # than we need, we loop and try again. while True: y1y2 = np.random.randint(0, image_shape[0], (remaining_count * 2, 2)) x1x2 = np.random.randint(0, image_shape[1], (remaining_count * 2, 2)) # Filter out zero area boxes threshold = 1 y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >= threshold][:remaining_count] x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >= threshold][:remaining_count] if y1y2.shape[0] == remaining_count and x1x2.shape[ 0] == remaining_count: break # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape # into x1, y1, x2, y2 order x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1) y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1) global_rois = np.hstack([y1, x1, y2, x2]) rois[-remaining_count:] = global_rois return rois pass
mat_size = int(sys.argv[1]) # Initialize the 2 random matrices only if this is rank 0 if rank == 0: mat_A = np.random.rand(mat_size, mat_size) mat_B = np.random.rand(mat_size, mat_size) ans = np.matmul(mat_A, mat_B) t_start = MPI.Wtime() power = np.log2(size) / 2 i_len = int(2**(np.ceil(power))) j_len = int(2**(np.floor(power))) send_list_A = np.split(mat_A, i_len, axis=0) send_list_B = np.split(mat_B, j_len, axis=1) send_list = [] for i in range(i_len): for j in range(j_len): send_list.append([send_list_A[i], send_list_B[j]]) else: mat_A = None mat_B = None send_list = None mats = comm.scatter(send_list, root=0) mat_C = matrix_mult(mats[0], mats[1]) res_list = comm.gather(mat_C, root=0)
def get_params_from_mat(matpath): """Get parameter from .mat file into parms(dict)""" def squeeze(vars_): # Matlab save some params with shape (*, 1) # However, we don't need the trailing dimension in TensorFlow. if isinstance(vars_, (list, tuple)): return [np.squeeze(v, 1) for v in vars_] else: return np.squeeze(vars_, 1) netparams = sio.loadmat(matpath)["net"]["params"][0][0] params = dict() for i in range(netparams.size): param = netparams[0][i] name = param["name"][0] value = param["value"] value_size = param["value"].shape[0] match = re.match(r"([a-z]+)([0-9]+)([a-z]+)", name, re.I) if match: items = match.groups() elif name == 'adjust_f': params['detection/weights'] = squeeze(value) continue elif name == 'adjust_b': params['detection/biases'] = squeeze(value) continue else: raise Exception('unrecognized layer params') op, layer, types = items layer = int(layer) if layer in [1, 3]: if op == 'conv': # convolution if types == 'f': params['conv%d/weights' % layer] = value elif types == 'b': value = squeeze(value) params['conv%d/biases' % layer] = value elif op == 'bn': # batch normalization if types == 'x': m, v = squeeze(np.split(value, 2, 1)) params['conv%d/BatchNorm/moving_mean' % layer] = m params['conv%d/BatchNorm/moving_variance' % layer] = np.square(v) elif types == 'm': value = squeeze(value) params['conv%d/BatchNorm/gamma' % layer] = value elif types == 'b': value = squeeze(value) params['conv%d/BatchNorm/beta' % layer] = value else: raise Exception elif layer in [2, 4]: if op == 'conv' and types == 'f': b1, b2 = np.split(value, 2, 3) else: b1, b2 = np.split(value, 2, 0) if op == 'conv': if types == 'f': params['conv%d/b1/weights' % layer] = b1 params['conv%d/b2/weights' % layer] = b2 elif types == 'b': b1, b2 = squeeze(np.split(value, 2, 0)) params['conv%d/b1/biases' % layer] = b1 params['conv%d/b2/biases' % layer] = b2 elif op == 'bn': if types == 'x': m1, v1 = squeeze(np.split(b1, 2, 1)) m2, v2 = squeeze(np.split(b2, 2, 1)) params['conv%d/b1/BatchNorm/moving_mean' % layer] = m1 params['conv%d/b2/BatchNorm/moving_mean' % layer] = m2 params['conv%d/b1/BatchNorm/moving_variance' % layer] = np.square(v1) params['conv%d/b2/BatchNorm/moving_variance' % layer] = np.square(v2) elif types == 'm': params['conv%d/b1/BatchNorm/gamma' % layer] = squeeze(b1) params['conv%d/b2/BatchNorm/gamma' % layer] = squeeze(b2) elif types == 'b': params['conv%d/b1/BatchNorm/beta' % layer] = squeeze(b1) params['conv%d/b2/BatchNorm/beta' % layer] = squeeze(b2) else: raise Exception elif layer in [5]: if op == 'conv' and types == 'f': b1, b2 = np.split(value, 2, 3) else: b1, b2 = squeeze(np.split(value, 2, 0)) assert op == 'conv', 'layer5 contains only convolution' if types == 'f': params['conv%d/b1/weights' % layer] = b1 params['conv%d/b2/weights' % layer] = b2 elif types == 'b': params['conv%d/b1/biases' % layer] = b1 params['conv%d/b2/biases' % layer] = b2 return params
def load_train_data(self, positive_file, negative_file): # #LOAD NEGATIVE # positive file is constant while negative file is changed during train! # if os.path.exists(negative_file + '.npy'): # negative_examples = np.load(negative_file + '.npy') # else: negative_examples = [] # remove \n with open(negative_file, 'r') as f: all_negative = f.read() with open(negative_file, 'w') as f: f.write(all_negative.replace('\n','')) with open(negative_file, 'r') as f: line = f.read(self.seq_len) while len(line) == self.seq_len: tokens = [int(self.charmap[char]) for char in line] assert len(tokens) == self.seq_len negative_examples.append(tokens) line = f.read(self.seq_len) # np.save(negative_file,np.array(negative_examples)) negative_examples = np.array(negative_examples) num_positive_samples = negative_examples.shape[0] #LOAD POSITIVE cache_positive = "%s_seqlen%0d.npy"%(positive_file,self.seq_len) if os.path.exists(cache_positive): positive_examples = np.load(cache_positive) else: positive_examples = [] with open(positive_file, 'r') as f: line = f.read(self.seq_len) while len(line) == self.seq_len: tokens = [int(self.charmap[char]) for char in line] assert len(tokens) == self.seq_len positive_examples.append(tokens) line = f.read(self.seq_len) positive_examples = np.array(positive_examples) np.save(cache_positive.replace('.npy',''),positive_examples) assert positive_examples.shape[1] == self.seq_len #choose only num_positive_samples from them permut = np.random.permutation(positive_examples.shape[0])[:num_positive_samples] positive_examples = positive_examples[permut] # CONCAT negative_examples = np.array(negative_examples) positive_examples = np.array(positive_examples) assert negative_examples.shape == positive_examples.shape self.sentences = np.concatenate((positive_examples,negative_examples),axis=0) # Generate labels positive_labels = [[0, 1]] * positive_examples.shape[0] negative_labels = [[1, 0]] * negative_examples.shape[0] self.labels = np.concatenate([positive_labels, negative_labels], 0) # # Shuffle the data # print "DISC shuffling data..." # shuffle_indices = np.random.permutation(self.sentences.shape[0]) # self.sentences = self.sentences[shuffle_indices] # self.labels = self.labels[shuffle_indices] # Split batches self.num_batch = int(len(self.labels) / self.batch_size) self.sentences = self.sentences[:self.num_batch * self.batch_size] self.labels = self.labels[:self.num_batch * self.batch_size] self.sentences_batches = np.split(self.sentences, self.num_batch, 0) self.labels_batches = np.split(self.labels, self.num_batch, 0) self.pointer = 0 print("done create_batches - [num_batch=%0d]"%self.num_batch)
def train_and_test(train_set, test_set, pipeline, le, srp_dict=None, save_cls=False, out_folder=None, data_aug=True): # do flip based data augmentation if data_aug: if srp_dict is not None: train_set = do_data_augmentation(train_set, srp_dict['res'], srp_dict['nsegs']) # check until which column features are stored i_max = 1 for i, col in enumerate(train_set.columns): if 'feat' in col: i_max = i + 1 # split the dataframe to get features and append the transformed labels data_train = np.split(train_set.to_numpy(), [i_max], axis=1) data_train[1] = le.transform(train_set["Class"]) data_test = np.split(test_set.to_numpy(), [i_max], axis=1) data_test[1] = le.transform(test_set["Class"]) # fit the classifier and predict on the test set pipeline.fit(data_train[0], data_train[1]) test_predicted = pipeline.predict(data_test[0]) accuracy_score = skl.metrics.accuracy_score(data_test[1], test_predicted) # extract confusion matrix and metrics conf_mat = skl.metrics.confusion_matrix(data_test[1], test_predicted, labels=le.transform(le.classes_)) if save_cls: if out_folder is None: save_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'saved_classifier') os.makedirs(save_dir, exist_ok=True) else: save_dir = os.path.join(out_folder, 'saved_classifier/') os.makedirs(save_dir, exist_ok=True) print("Saving Classifier to {} ... ".format(save_dir)) locs_in_train = train_set["Environment"].unique() save_string = "_".join(locs_in_train) pickle.dump( (pipeline), open(os.path.join(*[save_dir, save_string + '_classifier.obj']), "wb")) test_set = test_set.drop_duplicates(subset=["Recording ID"]) test_set["ID"].to_csv( os.path.join(*[save_dir, save_string + '_test_bags.csv']), index=False, header=True) return accuracy_score, conf_mat
def main(): for i in range(2, 12, 2): problem = 2 if problem == 1: traindata = np.loadtxt("Data_OneStepAhead/Lazer/train.txt") testdata = np.loadtxt("Data_OneStepAhead/Lazer/test.txt") # name = "Lazer" if problem == 2: traindata = np.loadtxt("Data_OneStepAhead/Sunspot/train.txt") testdata = np.loadtxt("Data_OneStepAhead/Sunspot/test.txt") # name = "Sunspot" if problem == 3: traindata = np.loadtxt("Data_OneStepAhead/Mackey/train.txt") testdata = np.loadtxt("Data_OneStepAhead/Mackey/test.txt") # name = "Mackey" if problem == 4: traindata = np.loadtxt("Data_OneStepAhead/Lorenz/train.txt") testdata = np.loadtxt("Data_OneStepAhead/Lorenz/test.txt") # name = "Lorenz" if problem == 5: traindata = np.loadtxt("Data_OneStepAhead/Rossler/train.txt") testdata = np.loadtxt("Data_OneStepAhead/Rossler/test.txt") # name = "Rossler" if problem == 6: traindata = np.loadtxt("Data_OneStepAhead/Henon/train.txt") testdata = np.loadtxt("Data_OneStepAhead/Henon/test.txt") # name = "Henon" if problem == 7: traindata = np.loadtxt("Data_OneStepAhead/ACFinance/train.txt") testdata = np.loadtxt("Data_OneStepAhead/ACFinance/test.txt") # name = "ACFinance" ############################### #THESE ARE THE HYPERPARAMETERS# ############################### hidden = 5 ip = 4 #input output = 1 topology = [ip, hidden, output] NumSample = 100000 ############################### #THESE ARE THE HYPERPARAMETERS# ############################### topology = [ip, hidden, output] netw = topology print(traindata) y_test = testdata[:, netw[0]] y_train = traindata[:, netw[0]] maxtemp = i num_chains = 10 swap_interval = 100000 # int(swap_ratio * (NumSample/num_chains)) #how ofen you swap neighbours. note if swap is more than Num_samples, its off burn_in = 0.6 learn_rate = 0.01 # in case langevin gradients are used. Can select other values, we found small value is ok. use_langevin_gradients = False # False leaves it as Random-walk proposals. Note that Langevin gradients will take a bit more time computationally problemfolder = '/home/rohit/Desktop/PT/PT_TimeSeriesResults_evalmaxtemp_/' # change this to your directory for results output - produces large datasets problemfolder_db = 'PT_TimeSeriesResults_evalmaxtemp_/' # save main results filename = "" run_nb = 0 while os.path.exists(problemfolder + name + '_%s' % (run_nb)): run_nb += 1 if not os.path.exists(problemfolder + name + '_%s' % (run_nb)): os.makedirs(problemfolder + name + '_%s' % (run_nb)) path = (problemfolder + name + '_%s' % (run_nb)) filename = "" run_nb = 0 while os.path.exists(problemfolder_db + name + '_%s' % (run_nb)): run_nb += 1 if not os.path.exists(problemfolder_db + name + '_%s' % (run_nb)): os.makedirs(problemfolder_db + name + '_%s' % (run_nb)) path_db = (problemfolder_db + name + '_%s' % (run_nb)) resultingfile = open(path + '/master_result_file.txt', 'a+') resultingfile_db = open(path_db + '/master_result_file.txt', 'a+') timer = time.time() pt = ParallelTempering(use_langevin_gradients, learn_rate, traindata, testdata, topology, num_chains, maxtemp, NumSample, swap_interval, path) directories = [ path + '/predictions/', path + '/posterior', path + '/results', path + '/surrogate', path + '/surrogate/learnsurrogate_data', path + '/posterior/pos_w', path + '/posterior/pos_likelihood', path + '/posterior/surg_likelihood', path + '/posterior/accept_list' ] for d in directories: pt.make_directory((filename) + d) pt.initialize_chains(burn_in) pos_w, fx_train, fx_test, rmse_train, rmse_test, acc_train, acc_test, likelihood_rep, swap_perc, accept_vec, accept = pt.run_chains( ) list_end = accept_vec.shape[1] accept_ratio = accept_vec[:, list_end - 1:list_end] / list_end accept_per = np.mean(accept_ratio) * 100 print(accept_per, ' accept_per') timer2 = time.time() timetotal = (timer2 - timer) / 60 print((timetotal), 'min taken') #PLOTS '''acc_tr = np.mean(acc_train [:]) acctr_std = np.std(acc_train[:]) acctr_max = np.amax(acc_train[:]) acc_tes = np.mean(acc_test[:]) acctest_std = np.std(acc_test[:]) acctes_max = np.amax(acc_test[:])''' rmse_tr = np.mean(rmse_train[:]) rmsetr_std = np.std(rmse_train[:]) rmsetr_max = np.amin(rmse_train[:]) rmse_tes = np.mean(rmse_test[:]) rmsetest_std = np.std(rmse_test[:]) rmsetes_max = np.amin(rmse_test[:]) outres = open(path + '/result.txt', "a+") np.savetxt(outres, (use_langevin_gradients, learn_rate, rmse_tr, rmsetr_std, rmsetr_max, rmse_tes, rmsetest_std, rmsetes_max, swap_perc, accept_per, timetotal), fmt='%1.5f') print(rmse_tr, rmsetr_max, rmse_tes, rmsetes_max) np.savetxt( resultingfile, (NumSample, maxtemp, swap_interval, num_chains, rmse_tr, rmsetr_std, rmsetr_max, rmse_tes, rmsetest_std, rmsetes_max), fmt='%1.5f') outres_db = open(path_db + '/result.txt', "a+") np.savetxt(outres_db, (use_langevin_gradients, learn_rate, rmse_tr, rmsetr_std, rmsetr_max, rmse_tes, rmsetest_std, rmsetes_max, swap_perc, accept_per, timetotal), fmt='%1.5f') np.savetxt( resultingfile_db, (NumSample, maxtemp, swap_interval, num_chains, rmse_tr, rmsetr_std, rmsetr_max, rmse_tes, rmsetest_std, rmsetes_max), fmt='%1.5f') x = np.linspace(0, rmse_train.shape[0], num=rmse_train.shape[0]) plt.plot(x, rmse_train, label='Test') plt.plot(x, rmse_test, label='Train') plt.legend(loc='upper right') plt.title("Plot of RMSE over time") plt.savefig(path + '/acc_samples.png') plt.clf() plt.plot(rmse_train, label='Test') plt.plot(rmse_test, label='Train') plt.legend(loc='upper right') plt.title("Plot of RMSE over time") plt.savefig(path_db + '/acc_samples.png') plt.clf() '''rmse_train = np.split(rmse_train, num_chains) print(rmse_train.T, ' rmse_tr -- ') rmse_test = np.asarray(np.split(rmse_test, num_chains)) plt.plot( rmse_train.T, label='Test') plt.plot( rmse_test.T, label='Train') plt.legend(loc='upper right') plt.title("Accuracy - sampling time") plt.savefig(path_db+'/rmse_samples.png') plt.clf()''' likelihood = likelihood_rep[:, 0] # just plot proposed likelihood likelihood = np.asarray(np.split(likelihood, num_chains)) #print(likelihood, ' rmse_tr -- ') # Plots plt.plot(likelihood.T) plt.savefig(path + '/likelihood.png') plt.clf() plt.plot(likelihood.T) plt.savefig(path_db + '/likelihood.png') plt.clf() plt.plot(accept_vec.T) plt.savefig(path_db + '/accept.png') plt.clf() #mpl_fig = plt.figure() #ax = mpl_fig.add_subplot(111) # ax.boxplot(pos_w) # ax.set_xlabel('[W1] [B1] [W2] [B2]') # ax.set_ylabel('Posterior') # plt.legend(loc='upper right') # plt.title("Boxplot of Posterior W (weights and biases)") # plt.savefig(path+'/w_pos.png') # plt.savefig(path+'/w_pos.svg', format='svg', dpi=600) # plt.clf() #dir() gc.collect() outres.close() resultingfile.close() resultingfile_db.close() outres_db.close()
def agg_by_coords(pts, sigs, aggfunc='mean'): df = DataFrame(np.hstack( (pts, sigs))).groupby([0, 1]).agg(aggfunc).reset_index().values return np.split(df, [2], axis=1)
def consecutive(data, step_size=1): """ Identify groups of consecutive integers, split them into separate arrays. """ return np.split(data, np.where(np.diff(data) != step_size)[0] + 1)
n_hashes=4, ff_chunks=10, lsh_dropout=0.1, weight_tie=True, causal=True, use_full_attn=False # set this to true for comparison with full attention ) model = TrainingWrapper(model) model.cuda() # prepare enwik8 data with gzip.open('./data/enwik8.gz') as file: X = np.fromstring(file.read(int(95e6)), dtype=np.uint8) trX, vaX = np.split(X, [int(90e6)]) data_train, data_val = torch.from_numpy(trX), torch.from_numpy(vaX) class TextSamplerDataset(Dataset): def __init__(self, data, seq_len): super().__init__() self.data = data self.seq_len = seq_len def __getitem__(self, index): rand_start = torch.randint(0, self.data.size(0) - self.seq_len - 1, (1, )) full_seq = self.data[rand_start:rand_start + self.seq_len + 1].long() return full_seq.cuda()
# print 'x = \n', x # print 'y = \n', y # le = preprocessing.LabelEncoder() # le.fit(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']) # print le.classes_ # y = le.transform(y) # print 'Last Version, y = \n', y # # # 路径,浮点型数据,逗号分隔,第4列使用函数iris_type单独处理 data = np.loadtxt(path, dtype=float, delimiter=',', converters={4: iris_type}) print(data) # 将数据的0到3列组成x,第4列得到y x, y = np.split(data, (4, ), axis=1) # 为了可视化,仅使用前两列特征 x = x[:, :2] # # print (x) # print (y) # # x = StandardScaler().fit_transform(x) # lr = LogisticRegression() # Logistic回归模型 # lr.fit(x, y.ravel()) # 根据数据[x,y],计算回归参数 # # 等价形式 lr = Pipeline([('sc', StandardScaler()), ('clf', LogisticRegression())]) lr.fit(x, y.ravel())
def disc_d_state_input_expm( A: np.ndarray, B: np.ndarray, dA: np.ndarray, dB: np.ndarray, dt: float = 1.0, order_hold: int = 0, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Discretize the state and input matrices, and their derivatives with the matrix exponential Args: A: State matrix B: Input matrix dA: Derivative state matrix dB: Derivative input matrix dt: Sampling time order_hold: zero order hold = 0 or first order hold = 1 Returns: 6-elements tuple containing - Ad: Discrete state matrix - B0d: Discrete input matrix (zero order hold) - B1d: Discrete input matrix (first order hold) - dAd: Derivative discrete state matrix - dB0d: Derivative discrete input matrix (zero order hold) - dB1d: Derivative discrete input matrix (first order hold) """ nj, nx, nu = dB.shape if order_hold == 0: F = np.zeros((nx + nu, nx + nu)) dF = np.zeros((nj, nx + nu, nx + nu)) dFd = np.zeros((nj, nx + nu, nx + nu)) F[:nx, :nx] = A F[:nx, nx:] = B dF[:, :nx, :nx] = dA dF[:, :nx, nx:] = dB for n in range(nj): if dF[n].any() or n == 0: Fd, dFd[n] = expm_frechet(F * dt, dF[n] * dt) Ad, B0d = np.split(Fd[:nx, :], indices_or_sections=[nx], axis=1) dAd, dB0d = np.split(dFd[:, :nx, :], indices_or_sections=[nx], axis=2) B1d = np.zeros((nx, nu)) dB1d = np.zeros((nj, nx, nu)) else: F = np.zeros((nx + 2 * nu, nx + 2 * nu)) dF = np.zeros((nj, nx + 2 * nu, nx + 2 * nu)) dFd = np.zeros((nj, nx + 2 * nu, nx + 2 * nu)) F[:nx, :nx] = A F[:nx, nx : nx + nu] = B F[nx : nx + nu, nx + nu :] = np.eye(nu) dF[:, :nx, :nx] = dA dF[:, :nx, nx : nx + nu] = dB for n in range(nj): if dF[n].any() or n == 0: Fd, dFd[n] = expm_frechet(F * dt, dF[n] * dt) Ad, B0d, B1d = np.split(Fd[:nx, :], indices_or_sections=[nx, nx + nu], axis=1) dAd, dB0d, dB1d = np.split(dFd[:, :nx, :], indices_or_sections=[nx, nx + nu], axis=2) return Ad, B0d, B1d, dAd, dB0d, dB1d
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Create hparams hparams = trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams, data_dir=os.path.expanduser( FLAGS.data_dir), problem_name=FLAGS.problem) hparams.force_full_predict = True hparams.scheduled_sampling_k = -1 # Params num_agents = 1 # TODO(mbz): fix the code for more agents num_steps = FLAGS.num_steps if hasattr(hparams.problem, "num_actions"): num_actions = hparams.problem.num_actions else: num_actions = None frame_shape = hparams.problem.frame_shape resized_frame = hparams.preprocess_resize_frames is not None if resized_frame: frame_shape = hparams.preprocess_resize_frames frame_shape += [hparams.problem.num_channels] dataset = registry.problem(FLAGS.problem).dataset( tf_estimator.ModeKeys.TRAIN, shuffle_files=True, data_dir=os.path.expanduser(FLAGS.data_dir), hparams=hparams) dataset = dataset.batch(num_agents, drop_remainder=True) data = dataset.make_one_shot_iterator().get_next() # Setup input placeholders input_size = [num_agents, hparams.video_num_input_frames] if num_actions is None: placeholders = { "inputs": tf.placeholder(tf.float32, input_size + frame_shape) } else: placeholders = { "inputs": tf.placeholder(tf.float32, input_size + frame_shape), "input_action": tf.placeholder(tf.int64, input_size + [1]), "input_reward": tf.placeholder(tf.int64, input_size + [1]), "reset_internal_states": tf.placeholder(tf.float32, []), } # Create model. model_cls = registry.model(FLAGS.model) model = model_cls(hparams, tf_estimator.ModeKeys.PREDICT) prediction_ops = model.infer(placeholders) states_q = Queue(maxsize=hparams.video_num_input_frames) actions_q = Queue(maxsize=hparams.video_num_input_frames) rewards_q = Queue(maxsize=hparams.video_num_input_frames) if num_actions is not None: all_qs = [states_q, actions_q, rewards_q] else: all_qs = [states_q] writer = common_video.WholeVideoWriter(fps=FLAGS.fps, output_path=FLAGS.output_gif) saver = tf.train.Saver(tf.trainable_variables()) with tf.train.SingularMonitoredSession() as sess: # Load latest checkpoint ckpt = tf.train.get_checkpoint_state( FLAGS.output_dir).model_checkpoint_path saver.restore(sess.raw_session(), ckpt) # get init frames from the dataset data_np = sess.run(data) frames = np.split(data_np["inputs"], hparams.video_num_input_frames, 1) for frame in frames: frame = np.squeeze(frame, 1) states_q.put(frame) writer.write(frame[0].astype(np.uint8)) if num_actions is not None: actions = np.split(data_np["input_action"], hparams.video_num_input_frames, 1) for action in actions: actions_q.put(np.squeeze(action, 1)) rewards = np.split(data_np["input_reward"], hparams.video_num_input_frames, 1) for reward in rewards: rewards_q.put(np.squeeze(reward, 1)) for step in range(num_steps): print(">>>>>>> ", step) if num_actions is not None: random_actions = np.random.randint(num_actions - 1) random_actions = np.expand_dims(random_actions, 0) random_actions = np.tile(random_actions, (num_agents, 1)) # Shape inputs and targets inputs, input_action, input_reward = (np.stack(list(q.queue), axis=1) for q in all_qs) else: assert len(all_qs) == 1 q = all_qs[0] elems = list(q.queue) # Need to adjust shapes sometimes. for i, e in enumerate(elems): if len(e.shape) < 4: elems[i] = np.expand_dims(e, axis=0) inputs = np.stack(elems, axis=1) # Predict next frames if num_actions is None: feed = {placeholders["inputs"]: inputs} else: feed = { placeholders["inputs"]: inputs, placeholders["input_action"]: input_action, placeholders["input_reward"]: input_reward, placeholders["reset_internal_states"]: float(step == 0), } predictions = sess.run(prediction_ops, feed_dict=feed) if num_actions is None: predicted_states = predictions[:, 0] else: predicted_states = predictions["targets"][:, 0] predicted_reward = predictions["target_reward"][:, 0] # Update queues if num_actions is None: new_data = (predicted_states) else: new_data = (predicted_states, random_actions, predicted_reward) for q, d in zip(all_qs, new_data): q.get() q.put(d.copy()) writer.write(np.round(predicted_states[0]).astype(np.uint8)) writer.finish_to_disk()
def resample(*arrays, **options): """Resample arrays or sparse matrices in a consistent way The default strategy implements one step of the bootstrapping procedure. Parameters ---------- *arrays : sequence of indexable data-structures Indexable data-structures can be arrays, lists, dataframes or scipy sparse matrices with consistent first dimension. Other Parameters ---------------- replace : boolean, True by default Implements resampling with replacement. If False, this will implement (sliced) random permutations. n_samples : int, None by default Number of samples to generate. If left to None this is automatically set to the first dimension of the arrays. If replace is False it should not be larger than the length of arrays. random_state : int, RandomState instance or None, optional (default=None) Determines random number generation for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term:`Glossary <random_state>`. stratify : array-like or None (default=None) If not None, data is split in a stratified fashion, using this as the class labels. Returns ------- resampled_arrays : sequence of indexable data-structures Sequence of resampled copies of the collections. The original arrays are not impacted. Examples -------- It is possible to mix sparse and dense arrays in the same run:: >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]]) >>> y = np.array([0, 1, 2]) >>> from scipy.sparse import coo_matrix >>> X_sparse = coo_matrix(X) >>> from sklearn.utils import resample >>> X, X_sparse, y = resample(X, X_sparse, y, random_state=0) >>> X array([[1., 0.], [2., 1.], [1., 0.]]) >>> X_sparse <3x2 sparse matrix of type '<... 'numpy.float64'>' with 4 stored elements in Compressed Sparse Row format> >>> X_sparse.toarray() array([[1., 0.], [2., 1.], [1., 0.]]) >>> y array([0, 1, 0]) >>> resample(y, n_samples=2, random_state=0) array([0, 1]) Example using stratification:: >>> y = [0, 0, 1, 1, 1, 1, 1, 1, 1] >>> resample(y, n_samples=5, replace=False, stratify=y, ... random_state=0) [1, 1, 1, 0, 1] See also -------- :func:`sklearn.utils.shuffle` """ random_state = check_random_state(options.pop('random_state', None)) replace = options.pop('replace', True) max_n_samples = options.pop('n_samples', None) stratify = options.pop('stratify', None) if options: raise ValueError("Unexpected kw arguments: %r" % options.keys()) if len(arrays) == 0: return None first = arrays[0] n_samples = first.shape[0] if hasattr(first, 'shape') else len(first) if max_n_samples is None: max_n_samples = n_samples elif (max_n_samples > n_samples) and (not replace): raise ValueError("Cannot sample %d out of arrays with dim %d " "when replace is False" % (max_n_samples, n_samples)) check_consistent_length(*arrays) if stratify is None: if replace: indices = random_state.randint(0, n_samples, size=(max_n_samples,)) else: indices = np.arange(n_samples) random_state.shuffle(indices) indices = indices[:max_n_samples] else: # Code adapted from StratifiedShuffleSplit() y = check_array(stratify, ensure_2d=False, dtype=None) if y.ndim == 2: # for multi-label y, map each distinct row to a string repr # using join because str(row) uses an ellipsis if len(row) > 1000 y = np.array([' '.join(row.astype('str')) for row in y]) classes, y_indices = np.unique(y, return_inverse=True) n_classes = classes.shape[0] class_counts = np.bincount(y_indices) # Find the sorted list of instances for each class: # (np.unique above performs a sort, so code is O(n logn) already) class_indices = np.split(np.argsort(y_indices, kind='mergesort'), np.cumsum(class_counts)[:-1]) n_i = _approximate_mode(class_counts, max_n_samples, random_state) indices = [] for i in range(n_classes): indices_i = random_state.choice(class_indices[i], n_i[i], replace=replace) indices.extend(indices_i) indices = random_state.permutation(indices) # convert sparse matrices to CSR for row-based indexing arrays = [a.tocsr() if issparse(a) else a for a in arrays] resampled_arrays = [_safe_indexing(a, indices) for a in arrays] if len(resampled_arrays) == 1: # syntactic sugar for the unit argument case return resampled_arrays[0] else: return resampled_arrays
def split_image_into_128(img): arr = np.array(np.split(img, 16)) arr = np.array(np.split(arr, 16, -1)) arr = arr.reshape((-1, 128,128))[..., None] return arr
def main(config_path, sigma_in): # hyper-parameter with open(config_path, 'r') as f: cfg = yaml.safe_load(f) cfg['MODEL']['ALPHA'] = 0.075 cfg['DATALOADER']['TIME_LENGTH'] = 200 cfg['DATALOADER']['SIGNAL_LENGTH'] = 50 cfg['DATALOADER']['VARIABLE_DELAY'] = 15 model_name = os.path.splitext(os.path.basename(config_path))[0] os.makedirs('../results/', exist_ok=True) save_path = f'../results/neural_norm/' os.makedirs(save_path, exist_ok=True) # print('sigma_neu accuracy') # performanceは1つの学習済みモデルに対してsigma_neu^testを0から0.15まで変えてそれぞれの正解率を記録する。 results_norm = [] # モデルのロード torch.manual_seed(1) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cfg['MODEL']['SIGMA_NEU'] = 0 model = RecurrentNeuralNetwork(n_in=1, n_out=2, n_hid=cfg['MODEL']['SIZE'], device=device, alpha_time_scale=cfg['MODEL']['ALPHA'], beta_time_scale=cfg['MODEL']['BETA'], activation=cfg['MODEL']['ACTIVATION'], sigma_neu=cfg['MODEL']['SIGMA_NEU'], sigma_syn=cfg['MODEL']['SIGMA_SYN'], use_bias=cfg['MODEL']['USE_BIAS'], anti_hebbian=cfg['MODEL']['ANTI_HEBB']).to(device) model_path = f'../trained_model/freq_schedule/{model_name}/epoch_{cfg["TRAIN"]["NUM_EPOCH"]}.pth' model.load_state_dict(torch.load(model_path, map_location=device)) model.eval() correct = 0 num_data = 0 # print('delta correct_rate') for delta_idx in range(50): while True: delta = np.random.rand() * 8 - 4 if abs(delta) >= 1: break N = 500 output_list = np.zeros(N) input_signal = romo_signal(delta, N, cfg['DATALOADER']['TIME_LENGTH'], cfg['DATALOADER']['SIGNAL_LENGTH'], sigma_in, cfg['MODEL']['ALPHA']) input_signal_split = np.split(input_signal, 10) for i in range(10): hidden = torch.zeros(50, model.n_hid) hidden = hidden.to(device) inputs = torch.from_numpy(input_signal_split[i]).float() inputs = inputs.to(device) hidden_list, outputs, _, _ = model(inputs, hidden) outputs_np = outputs.cpu().detach().numpy() output_list[i * 50: (i + 1) * 50] = np.argmax(outputs_np[:, -1], axis=1) results_norm.append(np.linalg.norm(hidden_list.cpu().detach().numpy()[:, :, :])) num_data += 500 if delta > 0: ans = 1 else: ans = 0 correct += (output_list == ans).sum() if delta_idx % 10 == 0: print(f'{np.mean(results_norm):.4f}') # print(f'{delta:.3f}', (output_list == ans).sum() / 200) # print(cfg['MODEL']['SIGMA_NEU'], correct / num_data) print(np.mean(results_norm), np.std(results_norm)) np.savetxt(os.path.join(save_path, f'{model_name}.txt'), np.array([np.mean(results_norm), np.std(results_norm)]))
visbiasinc = np.zeros([W.shape[0], K]) grad = np.zeros( W.shape) # gradient tracker for learning for epoch in range(1, epochs + 1): # in each epoch, we'll visit all users in a random order visitingOrder = np.array(trStats["u_users"]) np.random.shuffle(visitingOrder) # | Extension | Adaptive Learning Rate adapativeLearningRate = alpha / epoch**2 # | Extension | Mini Batch # numBatches = np.ceil(visitingOrder.shape[0]/B) batches = np.split(visitingOrder, B) for batch in batches: prevGrad = grad grad = np.zeros(W.shape) for user in batch: # get the ratings of that user ratingsForUser = lib.getRatingsForUser( user, training) # build the visible input v = rbm.getV(ratingsForUser) # get the weights associated to movies the user has seen
import numpy as np data = np.random.sample((8, 8)) a1, a2 = np.split(data, 2, axis=0)
def tile_with_columns(cls, op): in_df = op.inputs[0] out_df = op.outputs[0] col_names = op.col_names if not isinstance(col_names, list): column_index = calc_columns_index(col_names, in_df) out_chunks = [] dtype = in_df.dtypes[col_names] for i in range(in_df.chunk_shape[0]): c = in_df.cix[(i, column_index)] op = DataFrameIndex(col_names=col_names) out_chunks.append( op.new_chunk([c], shape=(c.shape[0], ), index=(i, ), dtype=dtype, index_value=c.index_value, name=col_names)) new_op = op.copy() return new_op.new_seriess(op.inputs, shape=out_df.shape, dtype=out_df.dtype, index_value=out_df.index_value, name=out_df.name, nsplits=(in_df.nsplits[0], ), chunks=out_chunks) else: # combine columns into one chunk and keep the columns order at the same time. # When chunk columns are ['c1', 'c2', 'c3'], ['c4', 'c5'], # selected columns are ['c2', 'c3', 'c4', 'c2'], `column_splits` will be # [(['c2', 'c3'], 0), ('c4', 1), ('c2', 0)]. selected_index = [ calc_columns_index(col, in_df) for col in col_names ] condition = np.where(np.diff(selected_index))[0] + 1 column_splits = np.split(col_names, condition) column_indexes = np.split(selected_index, condition) out_chunks = [[] for _ in range(in_df.chunk_shape[0])] column_nsplits = [] for i, (columns, column_idx) in enumerate(zip(column_splits, column_indexes)): dtypes = in_df.dtypes[columns] column_nsplits.append(len(columns)) for j in range(in_df.chunk_shape[0]): c = in_df.cix[(j, column_idx[0])] index_op = DataFrameIndex(col_names=list(columns), object_type=ObjectType.dataframe) out_chunk = index_op.new_chunk( [c], shape=(c.shape[0], len(columns)), index=(j, i), dtypes=dtypes, index_value=c.index_value, columns_value=parse_index(pd.Index(columns), store_data=True)) out_chunks[j].append(out_chunk) out_chunks = [item for l in out_chunks for item in l] new_op = op.copy() nsplits = (in_df.nsplits[0], tuple(column_nsplits)) return new_op.new_dataframes(op.inputs, shape=out_df.shape, dtypes=out_df.dtypes, index_value=out_df.index_value, columns_value=out_df.columns, chunks=out_chunks, nsplits=nsplits)
def read(self): fname_template = osp.join(self.path, "{}_{{}}.txt".format(self.name)) available = [ f.split(os.sep)[-1][len(self.name) + 1:-4] # Remove leading name for f in glob.glob(fname_template.format("*")) ] # Batch index node_batch_index = ( io.load_txt(fname_template.format("graph_indicator")).astype(int) - 1) n_nodes = np.bincount(node_batch_index) n_nodes_cum = np.concatenate(([0], np.cumsum(n_nodes)[:-1])) # Read edge lists edges = io.load_txt(fname_template.format("A"), delimiter=",").astype(int) - 1 # Remove duplicates and self-loops from edges _, mask = np.unique(edges, axis=0, return_index=True) mask = mask[edges[mask, 0] != edges[mask, 1]] edges = edges[mask] # Split edges into separate edge lists edge_batch_idx = node_batch_index[edges[:, 0]] n_edges = np.bincount(edge_batch_idx) n_edges_cum = np.cumsum(n_edges[:-1]) el_list = np.split(edges - n_nodes_cum[edge_batch_idx, None], n_edges_cum) # Node features x_list = [] if "node_attributes" in available: x_attr = io.load_txt(fname_template.format("node_attributes"), delimiter=",") if x_attr.ndim == 1: x_attr = x_attr[:, None] x_list.append(x_attr) if "node_labels" in available: x_labs = io.load_txt(fname_template.format("node_labels")) if x_labs.ndim == 1: x_labs = x_labs[:, None] x_labs = np.concatenate( [_normalize(xl_[:, None], "ohe") for xl_ in x_labs.T], -1) x_list.append(x_labs) if len(x_list) > 0: x_list = np.concatenate(x_list, -1) x_list = np.split(x_list, n_nodes_cum[1:]) else: print("WARNING: this dataset doesn't have node attributes." "Consider creating manual features before using it with a " "Loader.") x_list = [None] * len(n_nodes) # Edge features e_list = [] if "edge_attributes" in available: e_attr = io.load_txt(fname_template.format("edge_attributes")) if e_attr.ndim == 1: e_attr = e_attr[:, None] e_attr = e_attr[mask] e_list.append(e_attr) if "edge_labels" in available: e_labs = io.load_txt(fname_template.format("edge_labels")) if e_labs.ndim == 1: e_labs = e_labs[:, None] e_labs = e_labs[mask] e_labs = np.concatenate( [_normalize(el_[:, None], "ohe") for el_ in e_labs.T], -1) e_list.append(e_labs) if len(e_list) > 0: e_available = True e_list = np.concatenate(e_list, -1) e_list = np.split(e_list, n_edges_cum) else: e_available = False e_list = [None] * len(n_nodes) # Create sparse adjacency matrices and re-sort edge attributes in lexicographic # order a_e_list = [ sparse.edge_index_to_matrix( edge_index=el, edge_weight=np.ones(el.shape[0]), edge_features=e, shape=(n, n), ) for el, e, n in zip(el_list, e_list, n_nodes) ] if e_available: a_list, e_list = list(zip(*a_e_list)) else: a_list = a_e_list # Labels if "graph_attributes" in available: labels = io.load_txt(fname_template.format("graph_attributes")) elif "graph_labels" in available: labels = io.load_txt(fname_template.format("graph_labels")) labels = _normalize(labels[:, None], "ohe") else: raise ValueError("No labels available for dataset {}".format( self.name)) # Convert to Graph print("Successfully loaded {}.".format(self.name)) return [ Graph(x=x, a=a, e=e, y=y) for x, a, e, y in zip(x_list, a_list, e_list, labels) ]
def split(self, dataset, seed=None, frac_train=.8, frac_valid=.1, frac_test=.1, log_every_n=None): """ Splits compounds into train/validation/test using stratified sampling. Parameters ---------- dataset: dc.data.Dataset object Dataset. seed: int (Optional, Default None) Random seed. frac_train: float (Optional, Default .8) Fraction of dataset put into training data. frac_valid: float (Optional, Default .1) Fraction of dataset put into validation data. frac_test: float (Optional, Default .1) Fraction of dataset put into test data. log_every_n: int (Optional, Default None) Log every n examples (not currently used). Returns ------- retval: Tuple Tuple containing train indices, valid indices, and test indices """ # JSG Assert that split fractions can be written as proper fractions over 10. # This can be generalized in the future with some common demoninator determination. # This will work for 80/20 train/test or 80/10/10 train/valid/test (most use cases). np.testing.assert_equal(frac_train + frac_valid + frac_test, 1.) np.testing.assert_equal( 10 * frac_train + 10 * frac_valid + 10 * frac_test, 10.) if not seed is None: np.random.seed(seed) y_s = dataset.y[:, self.task_number] sortidx = np.argsort(y_s) split_cd = 10 train_cutoff = int(frac_train * split_cd) valid_cutoff = int(frac_valid * split_cd) + train_cutoff test_cutoff = int(frac_test * split_cd) + valid_cutoff train_idx = np.array([]) valid_idx = np.array([]) test_idx = np.array([]) while sortidx.shape[0] >= split_cd: sortidx_split, sortidx = np.split(sortidx, [split_cd]) shuffled = np.random.permutation(range(split_cd)) train_idx = np.hstack( [train_idx, sortidx_split[shuffled[:train_cutoff]]]) valid_idx = np.hstack([ valid_idx, sortidx_split[shuffled[train_cutoff:valid_cutoff]] ]) test_idx = np.hstack( [test_idx, sortidx_split[shuffled[valid_cutoff:]]]) # Append remaining examples to train if sortidx.shape[0] > 0: np.hstack([train_idx, sortidx]) return (train_idx, valid_idx, test_idx)