def slice_sample_all_components_optimized(rv_mdl, glob_mdl, data, prior, rows = None, cols = None , width = None): log = logging.getLogger("sampling") rv = rv_mdl.get() if isinstance(prior, collections.Sequence): if len(prior) == 1: prior_list = [prior[0]] * len(rv.flat) elif len(prior) == len(rv.flat): prior_list = prior else: raise IndexError("Expected either one prior for all rv or one for each") else: prior_list = [prior] * len(rv.flat) cur_ll = glob_mdl.log_likelihood(data) if rows == None: rows = npr.permutation(rv.shape[0]) if cols == None: cols = npr.permutation(rv.shape[1]) for row in rows: for col in cols: log_likelihood = glob_mdl.llike_function(data, rv_mdl, (row, col)) idx = np.ravel_multi_index((row, col), rv.shape) cur_ll = slice_sample_component(rv.flat, idx, log_likelihood, prior_list[idx], cur_ll, width)
def printFiltered(clustdict, seqdict, classdict, newfasta, newclass): outfasta = open(newfasta, "wb") outclass = open(newclass, "wb") # iterate over clusters for clist in clustdict.values(): keepIDs = [] # if cluster larger than 1, check labels if len(clist) > 1: pclass1 = [p for p in clist if classdict[p] == "0"] pclass2 = [p for p in clist if classdict[p] == "1"] # choose one protein from each class for cluster if pclass1: keepIDs.append(random.permutation(pclass1)[0]) if pclass2: keepIDs.append(random.permutation(pclass2)[0]) else: keepIDs = clist # print to new FASTA/.class file for ID in keepIDs: outfasta.write(">%s\n%s\n" % (ID, seqdict[ID])) outclass.write("%s\t%s\n" % (ID, classdict[ID])) outfasta.close() outclass.close()
def phaseshuffle(input_signal): """phaseshuffle(input_signal) phaseshuffle shuffles the phases of the component frequencies of a real signal among each other, but preserving the phase of the DC component and any Nyquist element. Input is a matrix of channels x timesteps.""" ## Fourier Transform to Get Component Frequencies' Phases and Magnitudes length_of_signal = input_signal.shape[1] from numpy.fft import rfft, irfft from numpy import angle, zeros, concatenate, exp from numpy.random import permutation print("Calculating component frequencies and their phases") y = rfft(input_signal, axis=1) magnitudes = abs(y) phases = angle(y) ## Shuffle Phases, Preserving DC component and Nyquist element (if present) number_of_channels, N = y.shape randomized_phases = zeros(y.shape) print("Randomizing") for j in range(number_of_channels): if N & 1: #If there are an odd number of elements #Retain the DC component and shuffle the remaining components. order = concatenate(([0], permutation(N-1)+1), axis=1) else: #Retain the DC and Nyquist element component and shuffle the remaining components. This makes the new signal real, instead of complex. order = concatenate(([0], permutation(N-2)+1, [-1]), axis=1) randomized_phases[j] = phases[j, order] ## Construct New Signal print("Constructing new signal") y1 = magnitudes*exp(1j*randomized_phases) output_signal = irfft(y1,n=length_of_signal, axis=1) #While the above code will produce a real signal when given a real signal, numerical issues sometimes make the output from ifft "complex", with a +/- 0i component. Since the imaginary component is accidental and meaningless, we remove it. return output_signal
def subsample(sample_num, labels = None, **kwargs): res = () if not labels is None: #return amount of sample from each label num_tags = len(np.unique(labels)) reslable = None sm = sample_num for i, argname in enumerate(kwargs): temp = None val = kwargs[argname] for j,tag in enumerate(np.unique(labels)): val = kwargs[argname] p = permutation(sm) if temp is None: temp = val[labels == tag][p] else: temp = np.concatenate((temp,val[labels == tag][p]), axis = 0) res = res + (temp,) for j, tag in enumerate(np.unique(labels)): p = permutation(sm) if reslable is None: reslable = labels[labels == tag][p] else: reslable = np.concatenate((reslable,labels[labels == tag][p]), axis = 0) return res + (reslable,) else: #just sample some points sm = sample_num for i, argname in enumerate(kwargs): val = kwargs[argname] p = permutation(sm) res = res + (val[p],) return res
def hariri(top_stimuli, targets, distractors, forbid_identical_targets=True, suffix_characters=0): randomness = list(permutation(np.tile([[True], [False]], (len(top_stimuli)/2,1)))) #generate balanced randomness for left/right target placing if forbid_identical_targets: if suffix_characters: while True: targets = list(permutation(targets)) distractors = list(permutation(distractors)) errors = 0 for i in range(len(targets)): if targets[i][:-suffix_characters] == top_stimuli[i][:-suffix_characters] or distractors[i][:-suffix_characters] == top_stimuli[i][:-suffix_characters] or targets[i][:-suffix_characters] == distractors[i][:-suffix_characters]: errors =+1 if errors == 0: break else: while targets[0] == top_stimuli[-1]: # avoid collisions when the last stimulus in the top_stimuli and target lists are the same targets = list(permutation(targets)) distractors = distractors[::-1] targets = targets[::-1] stimseq = pd.DataFrame(index=np.arange(len(top_stimuli)), columns={'emotion': [], 'emotion intensity': [], 'scrambling': [], 'gender': [], 'top face': [], 'left face': [], 'right face': [], 'correct answer': []}) stimseq['top face'] = top_stimuli for pos, top_stim in enumerate(stimseq['top face']): is_right = randomness.pop() if is_right: stimseq['left face'].ix[pos] = distractors.pop() stimseq['right face'].ix[pos] = targets.pop() stimseq['correct answer'].ix[pos] = 'right' else: stimseq['right face'].ix[pos] = distractors.pop() stimseq['left face'].ix[pos] = targets.pop() stimseq['correct answer'].ix[pos] = 'left' return stimseq
def test_sample_posterior(self): isa = ISA(2, 3, num_scales=10) isa.A = asarray([[1., 0., 1.], [0., 1., 1.]]) isa.initialize() params = isa.default_parameters() params['gibbs']['verbosity'] = 0 params['gibbs']['num_iter'] = 100 states_post = isa.sample_posterior(isa.sample(1000), params) states_prio = isa.sample_prior(states_post.shape[1]) states_post = states_post.flatten() states_post = states_post[permutation(states_post.size)] states_prio = states_prio.flatten() states_prio = states_prio[permutation(states_prio.size)] # on average, posterior samples should be distributed like prior samples p = ks_2samp(states_post, states_prio)[1] self.assertGreater(p, 0.0001) samples = isa.sample(100) states = isa.sample_posterior(samples, params) # reconstruction should be perfect self.assertLess(sum(square(dot(isa.A, states) - samples).flatten()), 1e-10)
def TwoSampleTest(self,sample1,sample2,numShuffles=1000,method='vanilla',blockSize=20): """ Compute the p-value associated to the MMD between two samples method determines the null approximation procedure: ----'vanilla': standard permutation test ----'block': block permutation test ----'wild': wild bootstrap ----'wild-center': wild bootstrap with empirical degeneration """ n1=shape(sample1)[0] n2=shape(sample2)[0] merged = concatenate( [sample1, sample2], axis=0 ) merged_len=shape(merged)[0] numBlocks = merged_len/blockSize K=self.kernel(merged) mmd = mean(K[:n1,:n1])+mean(K[n1:,n1:])-2*mean(K[n1:,:n1]) null_samples = zeros(numShuffles) if method=='vanilla': for i in range(numShuffles): pp = permutation(merged_len) Kpp = K[pp,:][:,pp] null_samples[i] = mean(Kpp[:n1,:n1])+mean(Kpp[n1:,n1:])-2*mean(Kpp[n1:,:n1]) elif method=='block': blocks=reshape(arange(merged_len),(numBlocks,blockSize)) for i in range(numShuffles): pb = permutation(numBlocks) pp = reshape(blocks[pb],(merged_len)) Kpp = K[pp,:][:,pp] null_samples[i] = mean(Kpp[:n1,:n1])+mean(Kpp[n1:,n1:])-2*mean(Kpp[n1:,:n1]) elif method=='wild' or method=='wild-center': if n1!=n2: raise ValueError("Wild bootstrap MMD available only on the same sample sizes") alpha = exp(-1/float(blockSize)) coreK = K[:n1,:n1]+K[n1:,n1:]-K[n1:,:n1]-K[:n1,n1:] for i in range(numShuffles): """ w is a draw from the Ornstein-Uhlenbeck process """ w = HelperFunctions.generateOU(n=n1,alpha=alpha) if method=='wild-center': """ empirical degeneration (V_{n,2} in Leucht & Neumann) """ w = w - mean(w) null_samples[i]=mean(outer(w,w)*coreK) elif method=='wild2': alpha = exp(-1/float(blockSize)) for i in range(numShuffles): wx=HelperFunctions.generateOU(n=n1,alpha=alpha) wx = wx - mean(wx) wy=HelperFunctions.generateOU(n=n2,alpha=alpha) wy = wy - mean(wy) null_samples[i]=mean(outer(wx,wx)*K[:n1,:n1])+mean(outer(wy,wy)*K[n1:,n1:])-2*mean(outer(wx,wy)*K[:n1,n1:]) else: raise ValueError("Unknown null approximation method") return sum(mmd<null_samples)/float(numShuffles)
def make_batches(data, labels=None, batch_size=100): if labels is not None: num_labels = labels.shape[1] cls_data = [data[find(labels[:,i] == 1)] for i in range(num_labels)] cls_sizes = [d.shape[0] for d in cls_data] cls_sels = [permutation(range(s)) for s in cls_sizes] n = min(cls_sizes) * len(cls_sizes) batch_size = min(n, batch_size) lpb = batch_size / num_labels new_dat = [] for i in range(n/batch_size): for sel, cd in zip(cls_sels, cls_data): new_dat.append(cd[sel[i*lpb:(i+1)*lpb]]) if sparse.issparse(data): data = sparse.vstack(new_dat).tocsr() else: data = np.vstack(new_dat) labels = np.tile(np.repeat(np.eye(num_labels),lpb,0), (n/batch_size,1)) n = len(labels) perm = range(n) else: n = data.shape[0] perm = permutation(range(n)) i = 0 while i < n: batch = perm[i:i+batch_size] i += batch_size yield (data[batch], None) if labels is None else (data[batch], labels[batch])
def train_svm(self, X, y, C, iter_max): """Train SVM using SMO algorithm Args: X (numpy.array): Design Matrix y (numpy.array): Response Vector {-1, 1} C (float): penalty parameter iter_max (int): maximum number of iterations Returns: (numpy.array, list): (alpha_array, b), trained parameters """ n, p = X.shape K = dot(X, X.T) alpha_array = zeros(n) b = [0] num_changed = 0 examine_all = True while num_changed > 0 or examine_all: num_changed = 0 if examine_all: alpha_index = permutation(range(n)) for i2 in alpha_index: if self.examine_example(i2, alpha_array, X, y, C, b, K): num_changed += 1 self.loglist.append( self.calc_objective_fast(alpha_array, y, K)) iter_max -= 1 if iter_max < 0: break if iter_max < 0: break else: alpha_index_nonbound = [i for i in range(n) if alpha_array[i] != 0 and alpha_array[i] != C] alpha_index_nonbound = permutation(alpha_index_nonbound) for i2 in alpha_index_nonbound: if self.examine_example(i2, alpha_array, X, y, C, b, K): num_changed += 1 self.loglist.append( self.calc_objective_fast(alpha_array, y, K)) iter_max -= 1 if iter_max < 0: break if iter_max < 0: break # stop if the number of changed alphas are less than n / 10 if num_changed < n / 10: break if examine_all: examine_all = False elif num_changed == 0: examine_all = True return (alpha_array, b)
def DivideDataRandom(dataByClass, numDivs, numClasses, dividedData, dividedClasses): sampleSize = map(lambda i : len(dataByClass[i]) // numDivs, range(numClasses)) print sampleSize for j in range(numClasses): random.permutation(dataByClass[j]) for i in range(numDivs): for j in range(numClasses): for k in range(sampleSize[j]): dividedData[i].append(dataByClass[j][sampleSize[j] * i + k]) dividedClasses[i].append(j)
def random_classification_noise(labels, frac_flip, symm_flip): """Generate random classification noise by flipping a proportion of labels randomly. """ if frac_flip > 0.0: num_ex = len(labels) if symm_flip: flip_idx = permutation(num_ex)[:round(frac_flip*num_ex)] labels[flip_idx] = -1.0*labels[flip_idx] else: flip_idx = permutation(num_ex)[:round(2.0*frac_flip*num_ex)] labels[flip_idx] = -1.0*labels[where(labels[flip_idx] > 0.0)]
def BalancedKFold(y, n_folds=3, n_iter=1, indices=None, shuffle=False, random_state=None): """ Return class-balanced cross validation folds """ y = asarray(y) n_samples = y.shape[0] unique_labels, y_inv = unique(y, return_inverse=True) n_classes = len(unique_labels) label_counts = bincount(y_inv) min_labels = min(label_counts) test_per_fold = floor(min_labels/n_folds) total_test = test_per_fold * n_classes train_per_fold = test_per_fold * (n_folds-1) total_train = train_per_fold * n_classes if train_per_fold < 1: raise ValueError("The least populated class has too few samples (%d) to " "use %d-fold cross validation!" % (min_labels, n_folds)) # Peform regular, stratified cross validation, but subsample all class # labels to even depth folds = [] for t in xrange(n_iter): for (training, testing) in StratifiedKFold(y_inv, n_folds): train = [] test = [] training = permutation(training) testing = permutation(testing) saved = 0 counts = zeros(n_classes) for i in training: if counts[y_inv[i]] < train_per_fold: train.append(i) counts[y_inv[i]] += 1 saved += 1 if saved >= total_train: break saved = 0 counts = zeros(n_classes) for i in testing: if counts[y_inv[i]] < test_per_fold: test.append(i) counts[y_inv[i]] += 1 saved += 1 if saved >= total_test: break folds.append((asarray(train), asarray(test))) return folds '''
def _generate_x(self, shuffle_experiment_order=False, **kwargs): """ Transform data such that x_i contains all gene expressions 1,...,n for experiment i """ self.x = self.data.copy() # create shuffled version to check importance of signal in actual data for e in self.x: self.x_shuffled.append(npr.permutation(e)) if shuffle_experiment_order: ra = npr.permutation(list(range(len(self.x)))) self.x = [self.x[i] for i in ra] self.x_shuffled = [self.x_shuffled[i] for i in ra]
def drop_samples(game, prob): """Drop samples from a sample game Samples are dropped independently with probability prob.""" sample_map = {} for prof, pays in zip(np.split(game.profiles, game.sample_starts[1:]), game.sample_payoffs): num_profiles, _, num_samples = pays.shape perm = rand.permutation(num_profiles) prof = prof[perm] pays = pays[perm] new_samples, counts = np.unique( rand.binomial(num_samples, prob, num_profiles), return_counts=True) splits = counts[:-1].cumsum() for num, prof_samp, pay_samp in zip( new_samples, np.split(prof, splits), np.split(pays, splits)): if num == 0: continue prof, pays = sample_map.setdefault(num, ([], [])) prof.append(prof_samp) pays.append(pay_samp[..., :num]) if sample_map: profiles = np.concatenate(list(itertools.chain.from_iterable( x[0] for x in sample_map.values())), 0) sample_payoffs = tuple(np.concatenate(x[1]) for x in sample_map.values()) else: # No data profiles = np.empty((0, game.num_role_strats), dtype=int) sample_payoffs = [] return rsgame.samplegame_copy(game, profiles, sample_payoffs, False)
def __init__(self, data_dir, feature_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False): DataProvider.__init__(self, data_dir, feature_range, init_epoch, init_batchnum, dp_params, test) self.shuffle_data = dp_params['shuffle_data'] # determine whether to shuffle test data if 'external_meta_path' in dp_params and dp_params['external_meta_path']: import iread.myio as mio ext_meta = mio.unpickle(dp_params['external_meta_path']) print 'Print load external_meta for %s succussfully' % dp_params['external_meta_path'] for item in ext_meta: self.batch_meta[item] = ext_meta[item] print '----Load %s from ext_meta succussfully' % item del ext_meta self.test = test self.feature_range = np.asarray(feature_range) self.num_feature = len(feature_range) self.batch_size = dp_params['batch_size'] self.keep_data_dic = False if self.batch_size > self.num_feature or self.batch_size <= 0: raise BasicDataProviderError('Invaid batch_size %d (num_image=%d)' % (self.batch_size, self.num_feature)) self.num_batch = (self.num_feature - 1)/ self.batch_size + 1 self.batch_range = range(self.num_feature) if self.curr_batchnum not in self.batch_range: self.curr_batchnum = 0 self.curr_batchnum = min(max(self.curr_batchnum, 0), self.num_feature - 1) self.batch_idx = self.curr_batchnum if test and self.shuffle_data == 0: # There is no need to shuffle testing data self.shuffled_feature_range = self.feature_range else: self.shuffled_feature_range = self.feature_range[rd.permutation(self.num_feature)] self.num_feature_type = len(self.batch_meta['feature_dim']) self.feature_dim = self.batch_meta['feature_dim']
def kde_entropy_sklearn(points, n_est=None): """ Use sklearn.neigbors.KernelDensity pdf to estimate entropy. Data is standardized before analysis. Sample points drawn from the kernel density estimate. Fails for bimodal and dirichlet, similar to statsmodels kde. """ n, d = points.shape # Default to the full set if n_est is None: n_est = n # reduce size of draw to n_est if n_est >= n: x = points else: x = points[permutation(n)[:n_est]] n = n_est #logp = sklearn_log_density(points, evaluation_points=n_est) logp = sklearn_log_density(x, evaluation_points=x) H = -np.mean(logp) return H / LN2
def shuffle_data(train_data, train_target): # shuf_train_data_list, shuf_train_target_list = shuffle_data(train_data_list, train_target_list) ###{{{ train_data = np.array(train_data) train_target = np.array(train_target) # print("train data shape") # print(train_data.shape) # if color_type == 1: # print("train_data.shape[0]: ") # print(train_data.shape[0]) # train_data = train_data.reshape(train_data.shape[0], color_type, # img_rows, img_cols) # else: # train_data = train_data.transpose((0, 3, 1, 2)) # train_target = np_utils.to_categorical(train_target, 10) # # train_data = train_data.astype('float32') # mean_pixel = [103.939, 116.779, 123.68] # print("type target, data: ") # print(type(train_target)) # print(type(train_data)) # print("shape target, data: ") # print(train_target.shape) # print(train_data.shape) # for c in xrange(3): # train_data[:, c, :, :] = train_data[:, c, :, :] - mean_pixel[c] # # train_data[:, 0, :, :] = train_data[:, 0, :, :] - mean_pixel[0] perm = permutation(len(train_target)) train_data = train_data[perm] train_target = train_target[perm] return train_data, train_target pass
def initParameter(ini): ''' This function is to generate a full ini file. The format of ini file: ('segment.ini') [file] datadir = ... savedir = ... [parameter] repeat = ... ''' if not os.path.exists(ini.file.savedir): os.mkdir(ini.file.savedir) namelist = os.listdir(ini.file.datadir) #print namelist ini.file.names = namelist repeat_time = ini.parameter.repeat cnt = len(namelist) ini.parameter.sequence = [] for i in range(repeat_time): ini.parameter.sequence += permutation(cnt).tolist() #print ini.parameter.sequence ini.parameter.current = 0 ini.save() return namelist, repeat_time
def plot_predictions(self): data = self.get_next_batch(train=False)[2] # get a test batch num_classes = self.test_data_provider.get_num_classes() NUM_ROWS = 2 NUM_COLS = 4 NUM_IMGS = NUM_ROWS * NUM_COLS NUM_TOP_CLASSES = min(num_classes, 4) # show this many top labels label_names = self.test_data_provider.batch_meta['label_names'] if self.only_errors: preds = n.zeros((data[0].shape[1], num_classes), dtype=n.single) else: preds = n.zeros((NUM_IMGS, num_classes), dtype=n.single) rand_idx = nr.randint(0, data[0].shape[1], NUM_IMGS) print rand_idx data[0] = n.require(data[0][:,rand_idx], requirements='C') data[1] = n.require(data[1][:,rand_idx], requirements='C') data += [preds] temp = data[0] print data print temp.ndim,temp.shape,temp.size # Run the model self.libmodel.startFeatureWriter(data, self.sotmax_idx) self.finish_batch() fig = pl.figure(3) fig.text(.4, .95, '%s test case predictions' % ('Mistaken' if self.only_errors else 'Random')) if self.only_errors: err_idx = nr.permutation(n.where(preds.argmax(axis=1) != data[1][0,:])[0])[:NUM_IMGS] # what the net got wrong data[0], data[1], preds = data[0][:,err_idx], data[1][:,err_idx], preds[err_idx,:] data[0] = self.test_data_provider.get_plottable_data(data[0]) for r in xrange(NUM_ROWS): for c in xrange(NUM_COLS): img_idx = r * NUM_COLS + c if data[0].shape[0] <= img_idx: break pl.subplot(NUM_ROWS*2, NUM_COLS, r * 2 * NUM_COLS + c + 1) pl.xticks([]) pl.yticks([]) try: img = data[0][img_idx,:,:,:] except IndexError: # maybe greyscale? img = data[0][img_idx,:,:] pl.imshow(img, interpolation='nearest') true_label = int(data[1][0,img_idx]) img_labels = sorted(zip(preds[img_idx,:], label_names), key=lambda x: x[0])[-NUM_TOP_CLASSES:] pl.subplot(NUM_ROWS*2, NUM_COLS, (r * 2 + 1) * NUM_COLS + c + 1, aspect='equal') ylocs = n.array(range(NUM_TOP_CLASSES)) + 0.5 height = 0.5 width = max(ylocs) pl.barh(ylocs, [l[0]*width for l in img_labels], height=height, \ color=['r' if l[1] == label_names[true_label] else 'b' for l in img_labels]) pl.title(label_names[true_label]) pl.yticks(ylocs + height/2, [l[1] for l in img_labels]) pl.xticks([width/2.0, width], ['50%', '']) pl.ylim(0, ylocs[-1] + height*2)
def read_and_normalize_and_shuffle_train_data(img_rows, img_cols, color_type=1): cache_path = os.path.join('cache', 'train_r_' + str(img_rows) + '_c_' + str(img_cols) + '_t_' + str(color_type) + '.dat') if not os.path.isfile(cache_path) or use_cache == 0: train_data, train_target, driver_id, unique_drivers = \ load_train(img_rows, img_cols, color_type) cache_data((train_data, train_target, driver_id, unique_drivers), cache_path) else: print('Restore train from cache!') (train_data, train_target, driver_id, unique_drivers) = \ restore_data(cache_path) train_data = np.array(train_data, dtype=np.uint8) train_target = np.array(train_target, dtype=np.uint8) if color_type == 1: train_data = train_data.reshape(train_data.shape[0], color_type, img_rows, img_cols) else: train_data = train_data.transpose((0, 3, 1, 2)) train_target = np_utils.to_categorical(train_target, 10) train_data = train_data.astype('float32') mean_pixel = [103.939, 116.779, 123.68] for c in range(3): train_data[:, c, :, :] = train_data[:, c, :, :] - mean_pixel[c] # train_data /= 255 perm = permutation(len(train_target)) train_data = train_data[perm] train_target = train_target[perm] print('Train shape:', train_data.shape) print(train_data.shape[0], 'train samples') return train_data, train_target, driver_id, unique_drivers
def __init__(self, num_folds, inputs, targets=None, permute=True): if permute: # Make a copy of the data, with a random permutation. self.ordering = npr.permutation(inputs.shape[0]) self.inputs = inputs[self.ordering,...].copy() if targets is not None: self.targets = targets[self.ordering,...].copy() else: self.targets = None else: self.ordering = np.arange(inputs.shape[0], dtype=int) self.inputs = inputs self.targets = targets self.fold_idx = 0 self.num_folds = num_folds self.edges = np.linspace(0, self.inputs.shape[0], self.num_folds+1).astype(int) self.indices = [] for ii in xrange(self.num_folds): self.indices.append( np.arange(self.edges[ii], self.edges[ii+1], dtype=int) ) self.folds = [] for ii in xrange(self.num_folds): self.folds.append(Fold(self, np.array(list(itertools.chain.from_iterable([self.indices[jj] for jj in range(0,ii)+range(ii+1,self.num_folds)])), dtype=int), np.array(self.indices[ii], dtype=int)))
def generate(self): def create_nodes(): for i in xrange(self.size): self.node.append(Node(i)) def get_edge_count(node): base = rand.random_integers(self.edges_low, self.edges_high) return min(base + condition(self.edges_spike, self.spike_prob), self.size-1) - node.degree() def create_edge(from_node, to_node): self.edge.append(Edge(from_node, to_node)) from_node.connect(to_node) to_node.connect(from_node) #print '%d => %d' % (_from, _to) # start here create_nodes() for _from in xrange(self.size): # print 'from=%d' % _from from_node = self.node[_from] n = get_edge_count(from_node) if n > 0: connections = 0 for _to in rand.permutation(self.size): # print 'to=%d' % _to to_node = self.node[_to] if _to != _from and not from_node.is_connected(to_node): create_edge(from_node, to_node) connections += 1 if connections == n: break
def trte_split(X, Y, tr_frac): """Split the data in X/Y into training and testing portions.""" if gp.is_garray(X): X = X.as_numpy_array() else: X = np.array(X) if gp.is_garray(Y): Y = Y.as_numpy_array() else: Y = np.array(Y) obs_count = X.shape[0] obs_dim = X.shape[1] tr_count = round(tr_frac * obs_count) te_count = obs_count - tr_count Xtr = np.zeros((tr_count, X.shape[1])) Ytr = np.zeros((tr_count, Y.shape[1])) Xte = np.zeros((te_count, X.shape[1])) Yte = np.zeros((te_count, Y.shape[1])) idx = npr.permutation(range(obs_count)) # Basic manual iteration for i in range(obs_count): if (i < tr_count): Xtr[i,:] = X[idx[i],:] Ytr[i,:] = Y[idx[i],:] else: Xte[(i - tr_count),:] = X[idx[i],:] Yte[(i - tr_count),:] = Y[idx[i],:] return [gp.garray(Xtr), gp.garray(Ytr), gp.garray(Xte), gp.garray(Yte)]
def bootstrap(self): # TODO: Discuss this implementation: do we want a random permutation over the first 60% of the dataset, # TODO: or rather take random elements from the dataset, resulting in a 60% coverage on average? n = self.number_of_samples() last_index = int(np.floor(n * 0.6)) indexes = rnd.permutation(n)[0:last_index] return self._data()[indexes], self._labels()[indexes]
def read_and_normalize_train_data(): train_data, train_target, train_id = load_train() print('Convert to numpy...') train_data = np.array(train_data, dtype=np.uint8) train_target = np.array(train_target, dtype=np.uint8) print('Reshape...') train_data = train_data.transpose((0, 3, 1, 2)) print('Convert to float...') train_data = train_data.astype('float32') mean_pixel = [103.939, 116.779, 123.68] print('Substract 0...') train_data[:, 0, :, :] -= mean_pixel[0] print('Substract 1...') train_data[:, 1, :, :] -= mean_pixel[1] print('Substract 2...') train_data[:, 2, :, :] -= mean_pixel[2] train_target = np_utils.to_categorical(train_target, 8) # Shuffle experiment START !!! perm = permutation(len(train_target)) train_data = train_data[perm] train_target = train_target[perm] # Shuffle experiment END !!! print('Train shape:', train_data.shape) print(train_data.shape[0], 'train samples') return train_data, train_target, train_id
def subset(L, cardinality): k, n = L.shape orderedIdx = arange(n) messedIdx = permutation(orderedIdx) rightIdx = messedIdx[:cardinality] rightData = L[:,rightIdx] return rightData
def resolve(self): t0 = time() self.time = 0 while not self.balanced() and not self.force_end(): for a_id in permutation(self.nb_ants): if self.ants[a_id].action(self.time): continue self.time += 1 if self.time % 100000 == 0: self.verbose(1, 'Time: %d' % self.time) self.verbose(1, 'Nb of uncontrolled sites: %d' % self.uncontrolled) sizes = [str(a.area_size) for a in self.ants] self.verbose(1, 'Territories: %s' % ','.join(sizes)) if self.uncontrolled > 0: delta_t = time() - self.tmark duration = format_duration(delta_t) delta_site = self.prev_uncontrolled - self.uncontrolled self.verbose(1, '%d sites taken in %s' % (delta_site, duration)) self.total_time += delta_t speed = (self.nb_sites - self.uncontrolled) / \ self.total_time expected_duration = self.uncontrolled / speed self.verbose(1, '%s expected for complete control' % (format_duration(expected_duration))) self.prev_uncontrolled = self.uncontrolled self.tmark = time() self.verbose(1, 'Balloon partitioning done, took %s, %d iterations' % (format_duration(time() - t0), self.time))
def preprocess(data): """ Log-transforms, centers and symmetrically whitens data. @type data: array_like @param data: data points stored in columns """ # log-transform data = log(data + 1.) # center data = data - mean(data, 1).reshape(-1, 1) # shuffle data = data[:, permutation(data.shape[1])] # find eigenvectors eigvals, eigvecs = eig(cov(data)) # eliminate eigenvectors whose eigenvalues are zero eigvecs = eigvecs[:, eigvals > 0] eigvals = eigvals[eigvals > 0] # symmetric whitening matrix whitening_matrix = dot(eigvecs, dot(diag(1. / sqrt(eigvals)), eigvecs.T)) # whiten data return dot(whitening_matrix, data)
def validation_data(self, folds): """ Performs data splitting, classifier training and prediction for given #folds :param folds: number of folds :return: list of numpy.array pairs (prediction, expected) """ df = self.data_frame response = [] assert len(df) > folds perms = array_split(permutation(len(df)), folds) for i in range(folds): train_idxs = list(range(folds)) train_idxs.pop(i) train = [] for idx in train_idxs: train.append(perms[idx]) train = concatenate(train) test_idx = perms[i] training = df.iloc[train] test_data = df.iloc[test_idx] y = self.__factorize(training) classifier = self.train(training[self.features], y) predictions = classifier.predict(test_data[self.features]) expected = self.__factorize(test_data) response.append([predictions, expected]) return response
def kde_entropy_statsmodels(points, n_est=None): """ Use statsmodels KDEMultivariate pdf to estimate entropy. Density evaluated at sample points. Slow and fails for bimodal, dirichlet; poor for high dimensional MVN. """ from statsmodels.nonparametric.kernel_density import KDEMultivariate n, d = points.shape # Default to the full set if n_est is None: n_est = n # reduce size of draw to n_est if n_est >= n: x = points else: x = points[permutation(n)[:n_est]] n = n_est predictor = KDEMultivariate(data=x, var_type='c'*d) p = predictor.pdf() H = -np.mean(log(p)) return H / LN2
def main(args): # load and preprocess dataset # data = load_data(args) u = np.load('u.npy') v = np.load('v.npy') x = np.load('x.npy') y = np.load('y.npy') dat = np.ones_like(u) n = len(x) adj = sps.coo_matrix((dat, (u, v)), shape=[n, n]).maximum(sps.eye(n)) # adj = sps.eye(n, n) data = type('', (), {}) data.graph = dgl.graph_index.create_graph_index(adj, readonly=True, multigraph=False) data.features = x data.labels = y data.num_labels = len(np.unique(y)) data.train_mask = np.zeros(n) data.val_mask = np.zeros(n) data.test_mask = np.zeros(n) p = npr.permutation(n) data.train_mask[p[:args.n_train]] = 1 data.val_mask[p[args.n_train : args.n_train + args.n_val]] = 1 data.test_mask[p[args.n_train + args.n_val:]] = 1 if args.gpu >= 0: ctx = mx.gpu(args.gpu) else: ctx = mx.cpu() if args.self_loop and not args.dataset.startswith('reddit'): data.graph.add_edges_from([(i,i) for i in range(len(data.graph))]) train_nid = mx.nd.array(np.nonzero(data.train_mask)[0]).astype(np.int64) test_nid = mx.nd.array(np.nonzero(data.test_mask)[0]).astype(np.int64) features = mx.nd.array(data.features) labels = mx.nd.array(data.labels) train_mask = mx.nd.array(data.train_mask) val_mask = mx.nd.array(data.val_mask) test_mask = mx.nd.array(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.sum().asscalar() n_val_samples = val_mask.sum().asscalar() n_test_samples = test_mask.sum().asscalar() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model g = DGLGraph(data.graph, readonly=True) g.ndata['features'] = features g.ndata['labels'] = labels if args.model == "mlp": mlp_train(ctx, args, n_classes, features, labels, train_mask, val_mask, test_mask) elif args.model == "gcn_ns": gcn_ns_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples) elif args.model == "gcn_cv": gcn_cv_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples, False) elif args.model == "graphsage_cv": graphsage_cv_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples, False) else: print("unknown model. Please choose from gcn_ns, gcn_cv, graphsage_cv")
def split_train_test(data, test_ratio): shuffled_indices = rnd.permutation(len(data)) test_set_size = int(len(data) * test_ratio) test_indices = shuffled_indices[:test_set_size] train_indices = shuffled_indices[test_set_size:] return data.iloc[train_indices], data.iloc[test_indices]
plt.ion() air = MaterialTable.fromMaterial('air', 0.4, 1) BK7 = MaterialTable.fromMaterial('BK7', 0.4, 1) MgF2 = MaterialTable.fromMaterial('MgF2', 0.4, 1) Ta2O5 = MaterialTable.fromMaterial('Ta2O5', 0.4, 1) AlAs = MaterialTable.fromMaterial('AlAs', 0.4, 1) SiO2 = MaterialTable.fromMaterial('SiO2', 0.4, 1) default = [MgF2, Ta2O5, AlAs, SiO2] permutation_stack = [] for i in range(100): permutation_stack += list(permutation(default)) wl = _np.arange(0.4, 1, 0.005) def getThicknesses(l=0.5, layers=2): widths = [] stack = [air] + permutation_stack[:layers] + [BK7] TM = TransferMatrix(stack, [1] * layers) IM = [TM.getInterfacialMatrix(i, l, 0, True) for i in range(-1, layers)] p0 = _np.mod(_np.angle(-IM[0][1, 0] / IM[0][1, 1]), 2 * _const.pi) # phase from immediate reflection phase = _np.mod( _np.angle(1 - (IM[0][1, 0] / IM[0][1, 1])**2), 2 * _const.pi ) # Phase change of thransmission through the first interface and back for i in range(layers):
def test_spatial_searchlight(self, lrn_sllrn_SL_partitioner, do_roi=False, results_backend='native'): """Tests both generic and ad-hoc searchlights (e.g. GNBSearchlight) Test of and adhoc searchlight anyways requires a ground-truth comparison to the generic version, so we are doing sweepargs here """ lrn, sllrn, SL, partitioner = lrn_sllrn_SL_partitioner ## if results_backend == 'hdf5' and not common_variance: ## # no need for full combination of all possible arguments here ## return if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active \ and isinstance(lrn, ChainMapper): raise SkipTest("Known to fail while trying to enable " "training_stats for the ChainMapper (M1NN here)") # e.g. for M1NN we need plain kNN(1) for m1nnsl, but to imitate m1nn # "learner" we must use a chainmapper atm if sllrn is None: sllrn = lrn ds = datasets['3dsmall'].copy() # Let's test multiclass here, so boost # of labels ds[6:18].T += 2 ds.fa['voxel_indices'] = ds.fa.myspace # To assure that users do not run into incorrect operation due to overflows ds.samples += 5000 ds.samples *= 1000 ds.samples = ds.samples.astype(np.int16) # compute N-1 cross-validation for each sphere # YOH: unfortunately sample_clf_lin is not guaranteed # to provide exactly the same results due to inherent # iterative process. Therefore lets use something quick # and pure Python cv = CrossValidation(lrn, partitioner) skwargs = dict( radius=1, enable_ca=['roi_sizes', 'raw_results', 'roi_feature_ids']) if do_roi: # select some random set of features nroi = rnd.randint(1, ds.nfeatures) # and lets compute the full one as well once again so we have a reference # which will be excluded itself from comparisons but values will be compared # for selected roi_id sl_all = SL(sllrn, partitioner, **skwargs) result_all = sl_all(ds) # select random features roi_ids = rnd.permutation(range(ds.nfeatures))[:nroi] skwargs['center_ids'] = roi_ids else: nroi = ds.nfeatures roi_ids = np.arange(nroi) result_all = None if results_backend == 'hdf5': skip_if_no_external('h5py') sls = [ sphere_searchlight(cv, results_backend=results_backend, **skwargs), #GNBSearchlight(gnb, NFoldPartitioner(cvtype=1)) SL(sllrn, partitioner, indexsum='fancy', **skwargs) ] if externals.exists('scipy'): sls += [SL(sllrn, partitioner, indexsum='sparse', **skwargs)] # Test nproc just once if externals.exists('pprocess') and not self._tested_pprocess: sls += [sphere_searchlight(cv, nproc=2, **skwargs)] self._tested_pprocess = True # Provide the dataset and all those searchlights for testing #self._test_searchlights(ds, sls, roi_ids, result_all) #nroi = len(roi_ids) #do_roi = nroi != ds.nfeatures all_results = [] for sl in sls: # run searchlight mvpa2.seed() # reseed rng again for m1nnsl results = sl(ds) all_results.append(results) #print `sl` # check for correct number of spheres self.assertTrue(results.nfeatures == nroi) # and measures (one per xfold) if partitioner.cvtype == 1: self.assertTrue(len(results) == len(ds.UC)) elif partitioner.cvtype == 0.5: # here we had 4 unique chunks, so 6 combinations # even though 20 max was specified for NFold self.assertTrue(len(results) == 6) else: raise RuntimeError("Unknown yet type of partitioner to check") # check for chance-level performance across all spheres # makes sense only if number of features was big enough # to get some stable estimate of mean if not do_roi or nroi > 20: # was for binary, somewhat labile with M1NN #self.assertTrue(0.4 < results.samples.mean() < 0.6) self.assertTrue(0.68 < results.samples.mean() < 0.82) mean_errors = results.samples.mean(axis=0) # that we do get different errors ;) self.assertTrue(len(np.unique(mean_errors) > 3)) # check resonable sphere sizes self.assertTrue(len(sl.ca.roi_sizes) == nroi) self.assertTrue(len(sl.ca.roi_feature_ids) == nroi) for i, fids in enumerate(sl.ca.roi_feature_ids): self.assertTrue(len(fids) == sl.ca.roi_sizes[i]) if do_roi: # for roi we should relax conditions a bit self.assertTrue(max(sl.ca.roi_sizes) <= 7) self.assertTrue(min(sl.ca.roi_sizes) >= 4) else: self.assertTrue(max(sl.ca.roi_sizes) == 7) self.assertTrue(min(sl.ca.roi_sizes) == 4) # check base-class state self.assertEqual(sl.ca.raw_results.nfeatures, nroi) # Test if we got results correctly for 'selected' roi ids if do_roi: assert_array_equal(result_all[:, roi_ids], results) if len(all_results) > 1: # if we had multiple searchlights, we can check either they all # gave the same result (they should have) aresults = np.array([a.samples for a in all_results]) dresults = np.abs(aresults - aresults.mean(axis=0)) dmax = np.max(dresults) self.assertTrue(dmax <= 1e-13) # Test the searchlight's reuse of neighbors for indexsum in ['fancy'] + (externals.exists('scipy') and ['sparse'] or []): sl = SL(sllrn, partitioner, indexsum='fancy', reuse_neighbors=True, **skwargs) mvpa2.seed() result1 = sl(ds) mvpa2.seed() result2 = sl(ds) # must be faster assert_array_equal(result1, result2)
""" numpy随机模块的排列运算 shuffle(x) |- 对x的数据进行直接洗牌。 permutation(x) |- 利用x的数据洗牌产生新的拷贝数据。 """ import numpy as np import numpy.random as rd a = np.arange(0, 10) r = rd.shuffle(a) # 没有返回值,a被洗牌 print('shuffle:', a, r) a = np.arange(0, 10) r = rd.permutation(a) # 返回洗牌的结果 print('permutation:', a, r)
def shuffle(A, d=5): n = A.shape[0] r = randint(1, d + 1, (n, 1)) A += permutation(A) * r return A
def runMousefunc(self, nFold=3, nTree=250, criterion="gini", density=0.1): """ CV: -1 => total model (no cv) CV: nFold => mean metric over cv """ self.__database.createGOIDView(self.__goidtable, double=["AUROC", "AUPR", "Fmax"], drop=True) self.__database.createProteinView(self.__proteintable, double=["ProteinID", "Label", "Score"], \ drop=True) # Get labels test = 0 pp = permutation(self.__numproteins) resultid = 0 for goid in self.__goid: print "____________ GOID %d ____________" % goid # Get label for GOID goidindex = where(self.__goid == goid) goidindex = int(goidindex[0]) print goidindex annotations = self.selectAnnotatedProteinsMousefunc(goidindex) print "0s=", len([x for x in annotations if x == 0]) print "1s=", len([x for x in annotations if x == 1]) print "-1s=", len([x for x in annotations if x == -1]) annotation = [] for value in annotations: annotation.append(value) annotation = asarray(annotation).astype(float64) annotation = annotation.ravel() print annotation.shape #model = RandomForestClassifier(n_estimators=100, n_jobs=-1) model = RandomForestClassifier(n_estimators=nTree, criterion=criterion, compute_importances=True,\ min_density=density, bootstrap=True, max_features="auto", \ n_jobs=8, verbose=1) model.fit(self.__network, annotation) scores = model.predict_proba(self.__network) per = Performance(annotations, scores[:, 0]) roc = per.AUROCGillis() print "AUROC= ", roc pr = per.AUPRGillis() print "AUPR= ", pr fmax = per.Fmax() print "Fmax= ", fmax self.__database.insertProteinView(self.__proteintable, resultid, goid[0], -1, self.__proteins, \ annotations, scores[:,0]) self.__database.insertGOIDView(self.__goidtable, resultid, goid[0], -1, [roc, pr, fmax]) resultid += 1 del per #break labelIx = range(self.__numproteins) offset = 0 fold = 0 meanroc = [] meanpr = [] meanfmax = [] while fold < nFold: print "____________ Fold= %d ____________" % fold lastelem = min(self.__numproteins, offset + floor(self.__numproteins / nFold)) ix = [] for index in pp[offset + 1:lastelem]: ix.append(labelIx[index]) offset = lastelem labeltmp = [] for value in annotations: labeltmp.append(float(value)) for index in ix: labeltmp[index] = 0 labeltmp = asarray(labeltmp).astype(float64) labeltmp = labeltmp.ravel() print "0s=", len([x for x in labeltmp if x == 0]) print "1s=", len([x for x in labeltmp if x == 1]) print "-1s=", len([x for x in labeltmp if x == -1]) model = RandomForestClassifier(n_estimators=nTree, criterion=criterion, compute_importances=True,\ min_density=density, bootstrap=True, max_features="auto", \ n_jobs=8, verbose=1) model.fit(self.__network, labeltmp) scores = model.predict_proba(self.__network) scores = scores[:, 0] score = [] annotation = [] proteins = [] for index in ix: score.append(float(scores[index])) annotation.append(annotations[index]) proteins.append(self.__proteins[index]) per = Performance(annotation, score) roc = per.AUROCGillis() print "AUROC= ", roc meanroc.append(roc) pr = per.AUPRGillis() print "AUPR= ", pr meanpr.append(pr) fmax = per.Fmax() print "Fmax= ", fmax meanfmax.append(fmax) self.__database.insertGOIDView(self.__goidtable, resultid, goid[0], fold, [roc, pr, fmax]) self.__database.insertProteinView(self.__proteintable, resultid, goid[0], fold, \ proteins, annotation, score) del annotation del score del per fold += 1 resultid += 1 roc_mean = reduce(lambda x, y: x + y / float(len(meanroc)), meanroc, 0) print "Mean AUROC= ", roc_mean pr_mean = reduce(lambda x, y: x + y / float(len(meanpr)), meanpr, 0) print "Mean AUPR= ", pr_mean fmax_mean = reduce(lambda x, y: x + y / float(len(meanfmax)), meanfmax, 0) print "Mean Fmax= ", fmax_mean self.__database.insertGOIDView(self.__goidtable, resultid, goid[0], nFold, \ [roc_mean, pr_mean, fmax_mean]) resultid += 1 test += 1
i += len(classes) epoch_loss = running_loss / size epoch_acc = running_corrects.data.item() / size print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc)) return predictions, all_proba, all_classes predictions, all_proba, all_classes = test_model2(model_vgg.classifier, size=dset_sizes['valid']) # Number of images to view for each visualization task n_view = 8 correct = np.where(predictions == all_classes)[0] from numpy.random import random, permutation idx = permutation(correct)[:n_view] loader_correct = torch.utils.data.DataLoader([dsets['valid'][x] for x in idx], batch_size=n_view, shuffle=True) for data in loader_correct: inputs_cor, labels_cor = data # Make a grid from batch out = torchvision.utils.make_grid(inputs_cor) imshow(out, title=[l.item() for l in labels_cor]) from IPython.display import Image, display for x in idx: display(Image(filename=dsets['valid'].imgs[x][0], retina=True))
def tran_kfolds( df, k=None, ft=None, out=None, var_fold=None, suffix="_mean", summaries=None, tf=tf_summarize, shuffle=True, seed=None, ): r"""Perform k-fold CV Perform k-fold cross-validation (CV) using a given fitting procedure (ft). Optionally provide a fold identifier column, or (randomly) assign folds. Args: df (DataFrame): Data to pass to given fitting procedure ft (gr.ft_): Partially-evaluated grama fit function; defines model fitting procedure and outputs to aggregate tf (gr.tf_): Partially-evaluated grama transform function; evaluation of fitted model will be passed to tf and provided with keyword arguments from summaries out (list or None): Outputs for which to compute `summaries`; None uses ft.out var_fold (str or None): Column to treat as fold identifier; overrides `k` suffix (str): Suffix for predicted value; used to distinguish between predicted and actual summaries (dict of functions): Summary functions to pass to tf; will be evaluated for outputs of ft. Each summary must have signature summary(f_pred, f_meas). Grama includes builtin options: gr.mse, gr.rmse, gr.rel_mse, gr.rsq, gr.ndme k (int): Number of folds; k=5 to k=10 recommended [1] shuffle (bool): Shuffle the data before CV? True recommended [1] Notes: - Many grama functions support *partial evaluation*; this allows one to specify things like hyperparameters in fitting functions without providing data and executing the fit. You can take advantage of this functionality to easly do hyperparameter studies. Returns: DataFrame: Aggregated results within each of k-folds using given model and summary transform References: [1] James, Witten, Hastie, and Tibshirani, "An introduction to statistical learning" (2017), Chapter 5. Resampling Methods Examples:: import grama as gr from grama.data import df_stang from grama.fit import ft_rf df_kfolds = ( df_stang >> gr.tf_kfolds( k=5, ft=ft_rf(out=["thick"], var=["E", "mu"]), ) """ ## Check invariants if ft is None: raise ValueError("Must provide ft keyword argument") if (k is None) and (var_fold is None): print("... tran_kfolds is using default k=5") k = 5 if summaries is None: print("... tran_kfolds is using default summaries mse and rsq") summaries = dict(mse=mse, rsq=rsq) n = df.shape[0] ## Handle custom folds if not (var_fold is None): ## Check for a valid var_fold if not (var_fold in df.columns): raise ValueError("var_fold must be in df.columns or None") ## Build folds levels = unique(df[var_fold]) k = len(levels) print("... tran_kfolds found {} levels via var_folds".format(k)) Is = [] for l in levels: Is.append(list(arange(n)[df[var_fold] == l])) else: ## Shuffle data indices if shuffle: if seed: set_seed(seed) I = permutation(n) else: I = arange(n) ## Build folds di = int(ceil(n / k)) Is = [I[i * di : min((i + 1) * di, n)] for i in range(k)] ## Iterate over folds df_res = DataFrame() for i in range(k): ## Train by out-of-fold data md_fit = df >> tf_filter(~var_in(X.index, Is[i])) >> ft ## Determine predicted and actual if out is None: out = str_replace(md_fit.out, suffix, "") else: out = str_replace(out, suffix, "") ## Test by in-fold data df_pred = md_fit >> ev_df( df=df >> tf_filter(var_in(X.index, Is[i])), append=False ) ## Specialize summaries for output names summaries_all = ChainMap( *[ { key + "_" + o: fun(X[o + suffix], X[o]) for key, fun in summaries.items() } for o in out ] ) ## Aggregate df_summary_tmp = ( df_pred >> tf_bind_cols(df[out] >> tf_filter(var_in(X.index, Is[i]))) >> tf(**summaries_all) # >> tf_mutate(_kfold=i) ) if var_fold is None: df_summary_tmp = df_summary_tmp >> tf_mutate(_kfold=i) else: df_summary_tmp[var_fold] = levels[i] df_res = concat((df_res, df_summary_tmp), axis=0).reset_index(drop=True) return df_res
def write_input_output(cfg, model, burst, aligned, denoised, filters, motion): """ :params burst: input images to the model, :shape [B, N, C, H, W] :params aligned: output images from the alignment layers, :shape [B, N, C, H, W] :params denoised: output images from the denoiser, :shape [B, N, C, H, W] :params filters: filters used by model, :shape [B, L, N, K2, 1, Hf, Wf] with Hf = (H or 1) for L = number of cascaded filters """ # -- file path -- path = Path(f"./output/n2sim/io_examples/{cfg.exp_name}/") if not path.exists(): path.mkdir(parents=True) # -- init -- B, N, C, H, W = burst.shape # -- save histogram of residuals -- denoised_np = denoised.detach().cpu().numpy() plot_histogram_residuals_batch(denoised_np, cfg.global_step, path, rand_name=False) # -- save histogram of gradients (denoiser) -- if not model.use_unet_only: denoiser = model.denoiser_info.model plot_histogram_gradients(denoiser, "denoiser", cfg.global_step, path, rand_name=False) # -- save histogram of gradients (alignment) -- if model.use_alignment: alignment = model.align_info.model plot_histogram_gradients(alignment, "alignment", cfg.global_step, path, rand_name=False) # -- save gradient norm by layer (denoiser) -- if not model.use_unet_only: denoiser = model.denoiser_info.model plot_histogram_gradient_norms(denoiser, "denoiser", cfg.global_step, path, rand_name=False) # -- save gradient norm by layer (alignment) -- if model.use_alignment: alignment = model.align_info.model plot_histogram_gradient_norms(alignment, "alignment", cfg.global_step, path, rand_name=False) if B > 4: B = 4 for b in range(B): # -- save dirty & clean & res triplet -- fn = path / Path(f"image_{cfg.global_step}_{b}.png") res = burst[b][N // 2] - denoised[b].mean(0) imgs = torch.stack([burst[b][N // 2], denoised[b].mean(0), res], dim=0) tv_utils.save_image(imgs, fn, nrow=3, normalize=True, range=(-0.5, 0.5)) # -- save images -- fn = path / Path(f"{cfg.global_step}_{b}.png") burst_b = torch.cat([ burst[b][[N // 2]] - burst[b][[0]], burst[b], burst[b][[N // 2]] - burst[b][[-1]] ], dim=0) aligned_b = torch.cat([ aligned[b][[N // 2]] - aligned[b][[0]], aligned[b], aligned[b][[N // 2]] - aligned[b][[-1]] ], dim=0) denoised_b = torch.cat([ denoised[b][[N // 2]] - denoised[b][[0]], denoised[b], denoised[b][[N // 2]] - denoised[b][[-1]] ], dim=0) imgs = torch.cat([burst_b, aligned_b, denoised_b], dim=0) # 2N,C,H,W tv_utils.save_image(imgs, fn, nrow=N + 2, normalize=True, range=(-0.5, 0.5)) # -- save filters -- fn = path / Path(f"filters_{cfg.global_step}_{b}.png") K = int(np.sqrt(filters.shape[3])) L = filters.shape[1] if filters.shape[-1] > 1: S = npr.permutation(filters.shape[-1])[:10] filters_b = filters[b, ..., 0, S, S].view(N * 10 * L, 1, K, K) else: filters_b = filters[b, ..., 0, 0, 0].view(N * L, 1, K, K) tv_utils.save_image(filters_b, fn, nrow=N, normalize=True) # -- save direction image -- fn = path / Path(f"arrows_{cfg.global_step}_{b}.png") if len(motion[b]) > 1 and len(motion[b].shape) > 1: arrows = create_arrow_image(motion[b], pad=2) tv_utils.save_image([arrows], fn) print(f"Wrote example images to file at [{path}]") plt.close("all")
def permute_for_monte_carlo(dist_matrix): """Returns permuted copy of distance matrix for Monte Carlo tests.""" size = len(dist_matrix) p = permutation(size) return dist_matrix[p][:, p]
def main(): # Dataset. ts_, ts_ext_, ts_vis_, ts, ts_ext, ts_vis, ys, ys_ = make_data() # Plotting parameters. vis_batch_size = 1024 ylims = (-1.75, 1.75) alphas = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55] percentiles = [0.999, 0.99, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1] vis_idx = npr.permutation(vis_batch_size) # From https://colorbrewer2.org/. if args.color == "blue": sample_colors = ('#8c96c6', '#8c6bb1', '#810f7c') fill_color = '#9ebcda' mean_color = '#4d004b' num_samples = len(sample_colors) else: sample_colors = ('#fc4e2a', '#e31a1c', '#bd0026') fill_color = '#fd8d3c' mean_color = '#800026' num_samples = len(sample_colors) # Fix seed for the random draws used in the plots. eps = torch.randn(vis_batch_size, 1).to(device) bm = BrownianPath(t0=ts_vis[0], w0=torch.zeros(vis_batch_size, 1).to(device)) # Model. model = LatentSDE().to(device) optimizer = optim.Adam(model.parameters(), lr=1e-2) scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=.999) kl_scheduler = utils.LinearScheduler(iters=args.kl_anneal_iters) logp_metric = utils.EMAMetric() log_ratio_metric = utils.EMAMetric() loss_metric = utils.EMAMetric() if args.show_prior: with torch.no_grad(): zs = model.sample_p(ts=ts_vis, batch_size=vis_batch_size, eps=eps, bm=bm).squeeze() ts_vis_, zs_ = ts_vis.cpu().numpy(), zs.cpu().numpy() zs_ = np.sort(zs_, axis=1) img_dir = os.path.join(args.train_dir, 'prior.png') plt.subplot(frameon=False) for alpha, percentile in zip(alphas, percentiles): idx = int((1 - percentile) / 2. * vis_batch_size) zs_bot_ = zs_[:, idx] zs_top_ = zs_[:, -idx] plt.fill_between(ts_vis_, zs_bot_, zs_top_, alpha=alpha, color=fill_color) # `zorder` determines who's on top; the larger the more at the top. plt.scatter(ts_, ys_, marker='x', zorder=3, color='k', s=35) # Data. plt.ylim(ylims) plt.xlabel('$t$') plt.ylabel('$Y_t$') plt.tight_layout() plt.savefig(img_dir, dpi=args.dpi) plt.close() logging.info(f'Saved prior figure at: {img_dir}') for global_step in tqdm.tqdm(range(args.train_iters)): # Plot and save. if global_step % args.pause_iters == 0: img_path = os.path.join(args.train_dir, f'global_step_{global_step}.png') with torch.no_grad(): zs = model.sample_q(ts=ts_vis, batch_size=vis_batch_size, eps=eps, bm=bm).squeeze() samples = zs[:, vis_idx] ts_vis_, zs_, samples_ = ts_vis.cpu().numpy(), zs.cpu().numpy( ), samples.cpu().numpy() zs_ = np.sort(zs_, axis=1) plt.subplot(frameon=False) if args.show_percentiles: for alpha, percentile in zip(alphas, percentiles): idx = int((1 - percentile) / 2. * vis_batch_size) zs_bot_, zs_top_ = zs_[:, idx], zs_[:, -idx] plt.fill_between(ts_vis_, zs_bot_, zs_top_, alpha=alpha, color=fill_color) if args.show_mean: plt.plot(ts_vis_, zs_.mean(axis=1), color=mean_color) if args.show_samples: for j in range(num_samples): plt.plot(ts_vis_, samples_[:, j], color=sample_colors[j], linewidth=1.0) if args.show_arrows: num, dt = 12, 0.12 t, y = torch.meshgrid([ torch.linspace(0.2, 1.8, num).to(device), torch.linspace(-1.5, 1.5, num).to(device) ]) t, y = t.reshape(-1, 1), y.reshape(-1, 1) fty = model.f(t=t, y=y).reshape(num, num) dt = torch.zeros(num, num).fill_(dt).to(device) dy = fty * dt dt_, dy_, t_, y_ = dt.cpu().numpy(), dy.cpu().numpy( ), t.cpu().numpy(), y.cpu().numpy() plt.quiver(t_, y_, dt_, dy_, alpha=0.3, edgecolors='k', width=0.0035, scale=50) if args.hide_ticks: plt.xticks([], []) plt.yticks([], []) plt.scatter(ts_, ys_, marker='x', zorder=3, color='k', s=35) # Data. plt.ylim(ylims) plt.xlabel('$t$') plt.ylabel('$Y_t$') plt.tight_layout() plt.savefig(img_path, dpi=args.dpi) plt.close() logging.info(f'Saved figure at: {img_path}') if args.save_ckpt: torch.save({'model': model.state_dict()}, os.path.join(ckpt_dir, f'global_step_{global_step}.ckpt')) # Train. optimizer.zero_grad() zs, log_ratio = model(ts=ts_ext, batch_size=args.batch_size) zs = zs.squeeze() zs = zs[ 1: -1] # Drop first and last which are only used to penalize out-of-data region and spread uncertainty. likelihood = { "laplace": Laplace(loc=zs, scale=args.scale), "normal": Normal(loc=zs, scale=args.scale) }[args.likelihood] logp = likelihood.log_prob(ys).sum(dim=0).mean(dim=0) loss = -logp + log_ratio * kl_scheduler() loss.backward() optimizer.step() scheduler.step() kl_scheduler.step() logp_metric.step(logp) log_ratio_metric.step(log_ratio) loss_metric.step(loss) logging.info( f'global_step: {global_step}, ' f'logp: {logp_metric.val():.3f}, log_ratio: {log_ratio_metric.val():.3f}, loss: {loss_metric.val():.3f}' )
def generate_sr_instance(players): players = set(players) preferences = {p :list(permutation(list(players - {p}))) for p in players} return preferences
lebron_normalized = nba_normalized[nba["player"] == "LeBron James"] # Find the distance between Lebron James and everyone else. euclidean_distances = nba_normalized.apply(lambda row: distance.euclidean(row, lebron_normalized), axis=1) distance_frame = pandas.DataFrame(data={"dist": euclidean_distances, "idx": euclidean_distances.index}) distance_frame.sort_values("dist", inplace=True) second_smallest = distance_frame.iloc[1]["idx"] most_similar_to_lebron = nba.loc[int(second_smallest)]["player"] ## 6. Generating Training and Testing Sets ## import random from numpy.random import permutation # Randomly shuffle the index of nba random_indices = permutation(nba.index) # Set a cutoff for how many items we want in the test set (in this case 1/3 of the items) test_cutoff = math.floor(len(nba)/3) # Generate the test set by taking the first 1/3 of the randomly shuffled indices test = nba.loc[random_indices[1:test_cutoff]] # Generate the train set with the rest of the data train = nba.loc[random_indices[test_cutoff:]] ## 7. Using sklearn ## # The columns that we'll be using to make predictions x_columns = ['age', 'g', 'gs', 'mp', 'fg', 'fga', 'fg.', 'x3p', 'x3pa', 'x3p.', 'x2p', 'x2pa', 'x2p.', 'efg.', 'ft', 'fta', 'ft.', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf'] # The column we want to predict y_column = ["pts"] from sklearn.neighbors import KNeighborsRegressor
def preserve_strength(g, randomize_topology=False, preserve_mode='estimate_both', permute_strength=True, randomize_method='vl'): from numpy import array, ndarray from scipy.sparse import csc if (type(g) == ndarray or type(g) == csc.csc_matrix) and preserve_mode in [ 'estimate_both', 3 ] and not randomize_topology: A = g from numpy import sum out_strength = sum(A, axis=1) in_strength = sum(A, axis=0) adj = 1 * (A > 0) out_degree = sum(adj, axis=1) in_degree = sum(adj, axis=0) if permute_strength: from numpy.random import permutation from numpy import unique out_in_sequence = array(list(zip(out_degree, in_degree))) for k in unique(out_in_sequence): k_ind = where((out_in_sequence == k).all(axis=1))[0] new_ind = permutation(k_ind) out_strength[k_ind] = out_strength[new_ind] in_strength[k_ind] = in_strength[new_ind] from numpy import mean, outer, logical_not mean_k = mean([out_degree, in_degree]) mean_s = mean([out_strength, in_strength]) G = (mean_k / mean_s) * outer(out_strength, in_strength) / outer( out_degree, in_degree) G[logical_not(adj)] = 0 return G out_degree = g.degree(mode=1) in_degree = g.degree(mode=2) if randomize_topology: if g.is_directed(): #Check if all edges are bidirectional. #If so, create a random graph with only bidirectional edges. G = g.copy() G.to_undirected(mode=False) if all(array(G.count_multiple()) == 2): G = g.Degree_Sequence(out_degree, method=randomize_method) G.to_directed() else: G = g.copy() G.rewire() else: G = g.Degree_Sequence(out_degree, method=randomize_method) else: G = g.copy() if preserve_mode in ['estimate_both', 3]: out_strength = array(g.strength(mode=1, weights='weight')) in_strength = array(g.strength(mode=2, weights='weight')) if permute_strength: from numpy.random import permutation from numpy import unique out_in_sequence = array(list(zip(out_degree, in_degree))) for k in unique(out_in_sequence): k_ind = where((out_in_sequence == k).all(axis=1))[0] new_ind = permutation(k_ind) out_strength[k_ind] = out_strength[new_ind] in_strength[k_ind] = in_strength[new_ind] from numpy import mean mean_k = mean([out_degree, in_degree]) mean_s = mean([out_strength, in_strength]) for e in G.es: e["weight"] = ((mean_k / mean_s) * out_strength[e.source] * in_strength[e.target] / (out_degree[e.source] * in_degree[e.target])) return G elif preserve_mode in ['out', 1]: preserve_mode = 1 elif preserve_mode in ['in', 2]: preserve_mode = 2 from numpy import sum ind = [g.incident(v, mode=preserve_mode) for v in range(g.vcount())] weights = g.es[sum(ind)]['weight'] from numpy.random import shuffle map(shuffle, ind) G.es[sum(ind)]['weight'] = weights return G
def optimize(cfg, data): ################################# ''' configurations ''' ################################# print('\n\n*** Configuring') # choosing GPU device #os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1' session_config = tf.ConfigProto() session_config.gpu_options.visible_device_list = '0' # which GPUs to use print('\nConfigs:\n', cfg) # network k = cfg['k'] alpha = cfg['alpha'] # optimization learning_rate = cfg['learning_rate'] beta = cfg['beta'] batch_size = cfg['batch_size'] break_thresh = cfg['break_thresh'] training_epochs = cfg['training_epochs'] # meta cfg_name = cfg['name'] save_step = cfg['save_step'] print_step = cfg['print_step'] train_acc_checkpoints = cfg['train_acc_checkpoints'] # paths data_path = '../data/' res_path = cfg['res_path'] logs_path = res_path + 'logs/' + cfg_name + '/' data_name = res_path + cfg_name + '_data' res_name = res_path + cfg_name + '_res' ckpt_name = res_path + cfg_name + '_model' #model_name = 'C:/Users/Shira/Documents/TF/model_1_' + cfg_name + '.ckpt' np.save(data_name, data) print('\n*** Preparing data\n') x_train = data['x_train'] y_train = data['y_train'] x_test = data['x_test'] y_test = data['y_test'] n_train = x_train.shape[0] dim = x_train.shape[1] n_test = x_test.shape[0] n_all = n_train + n_test n_classes = 2 if (n_train % batch_size != 0): print("\n*** Warning! batch size doesn't divide n_train *** \n") input("Press enter to continue") total_batch = int(n_train / batch_size) vi = 1 / np.sqrt(2 * k) w_max = 1 / np.sqrt(2 * k) print('\n*** Configured according to', cfg_name) # array for holding the accuracy results n_max_epochs = training_epochs + 1 avg_costs = np.zeros(n_max_epochs) G_w = np.zeros(n_max_epochs) F_w = np.zeros(n_max_epochs) nzs_per_epoch = [] iszero_list = [] train_acc_list = [] test_acc_list = [] w_learned_list = [] print('\n*** Building Computation Graph') # tf Graph Input x = tf.placeholder(tf.float32, [None, dim], name='InputData') y = tf.placeholder(tf.float32, [None], name='LabelData') weights = {} weights_out = tf.concat((vi * tf.ones(k), -vi * tf.ones(k)), axis=0) weights_init = rn.normal(0, 1, [dim, 2 * k]) for i in range(2 * k): weights_init[:, i] /= np.sqrt(np.sum(weights_init[:, i]**2)) weights_init *= w_max def degenerate_multilayer_perceptron(x, weights, alpha): layer = x n_curr = dim w_name = 'w0' weights[w_name] = tf.Variable(tf.cast(weights_init, tf.float32), name=w_name) layer = tf.matmul(layer, weights[w_name]) layer = leaky_relu(layer, alpha) # Output layer out_layer = tf.tensordot(layer, weights_out, axes=1) return out_layer def leaky_relu(x, alpha): return tf.nn.relu(x) - alpha * tf.nn.relu(-x) # Encapsulating all ops into scopes, making Tensorboard's Graph # Visualization more convenient with tf.name_scope('Model'): # Build model pred = degenerate_multilayer_perceptron(x, weights, alpha) with tf.name_scope('Loss'): loss = tf.losses.hinge_loss(labels=y, logits=pred) with tf.name_scope('SGD'): # Gradient Descent optimizer = tf.train.GradientDescentOptimizer(learning_rate) # Op to calculate every variable gradient grads = tf.gradients(loss, tf.trainable_variables()) grads_and_vars = list(zip(grads, tf.trainable_variables())) # Op to update all variables according to their gradient apply_grads = optimizer.apply_gradients(grads_and_vars=grads_and_vars) with tf.name_scope('Accuracy'): # Accuracy acc = tf.equal(tf.sign(pred), tf.sign(y - .5)) acc = tf.reduce_mean(tf.cast(acc, tf.float32)) # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() #saver = tf.train.Saver([weights['w0']]) # Start training with tf.Session(config=session_config) as sess: print('\n*** Training') # Run the initializer sess.run(init) epoch = 0 # Training cycle for epoch in range(training_epochs): avg_cost = 0. epoch_perm = rn.permutation(n_train) nzs_per_epoch.append([0]) first_step_in_epoch = True for ind in epoch_perm: #save_path = saver.save(sess, ckpt_name) if cfg['save_sbs'] or first_step_in_epoch: #and epoch < lim_save_sbs train_acc = acc.eval({x: x_train, y: y_train}) test_acc = acc.eval({x: x_test, y: y_test}) train_acc_list.append(train_acc) test_acc_list.append(test_acc) w_learned_list.append(np.array(weights['w0'].eval()).T) batch_xs, batch_ys = x_train[ind, :].reshape( -1, dim), y_train[ind].reshape(1) # Run optimization op (backprop), cost op (to get loss value) _, loss_on_batch = sess.run([apply_grads, loss], feed_dict={ x: batch_xs, y: batch_ys }) # Compute average loss avg_cost += loss_on_batch / total_batch zs = np.int8(loss_on_batch == 0) nzs_per_epoch[-1] += 1 - zs first_step_in_epoch = False avg_costs[epoch] = avg_cost ind_cp = len(train_acc_checkpoints) - 1 while ind_cp >= 0: if (train_acc > train_acc_checkpoints[ind_cp]): print( '\n\n!!! checkpoint: {:.3f}\n epoch: {}\n train acc: {:.3f}\n test acc: {:.3f}\n' .format(train_acc_checkpoints[ind_cp], epoch, train_acc, test_acc)) train_acc_checkpoints = train_acc_checkpoints[ind_cp + 1:] break else: ind_cp -= 1 stopping = (train_acc >= break_thresh) if (epoch / print_step == 100): print_step *= 10 print( '\nprint step grows by a factor of 10 and is now equal to', print_step) if (epoch % print_step == 0) or (epoch < 10) or stopping: print('\n\nEpoch: {}'.format(epoch)) print( 'Before training on epoch, train Accuracy: {:.3f}'.format( train_acc)) print('Test Accuracy: {:.3f}'.format(test_acc)) print('While training, cost =', '{:.9f}'.format(avg_cost), '({:.3f})'.format(np.exp(-avg_cost))) print('Number of non-zero steps (all): ', nzs_per_epoch[-1]) else: print('{}, '.format(epoch), end='') if (epoch % save_step == 0) or stopping: print('\n*** Saving') ind_try = 0 while True: ind_try += 1 try: np.savez(res_name, avg_costs=avg_costs[:epoch + 1], x_train=x_train, y_train=y_train, nzs_per_epoch=nzs_per_epoch, train_acc_list=train_acc_list, test_acc_list=test_acc_list, w_learned_list=w_learned_list, config=cfg) break except PermissionError: print('\n#' * 20, end='') print( '\n<<< Saving attempt {} failed, trying again >>> \n' .format(ind_try)) except KeyboardInterrupt: print('\n<<< Simulation interrupted, cannot save') stopping = True break if stopping: print( '\n*** Epoch: {}\n Training reached {} threshold and is stopping' .format(epoch, break_thresh)) break # print('\n*** Saving model') # saver.save(sess, model_name) # #saver_b.save(sess, biases_name) print('\n*** Optimization Finished!') print('\n*** Configured according to', cfg_name) print('Configs:\n', cfg) # Calculate accuracy print('*** Train Accuracy: {:.3f}'.format( acc.eval({ x: x_train, y: y_train }))) print('*** Accuracy: {:.3f}'.format(acc.eval({x: x_test, y: y_test}))) print('\n*** Run the command line:' \ '\n --> tensorboard --logdir=', logs_path, \ '\n Then open http://0.0.0.0:6006/ into your web browser\n') return
import numpy as np from numpy import random import numpy as np if __name__ == '__main__': rand1 = random.choice([3, 5, 7, 11, 13], p=[0.1, 0.2, 0.2, 0.45, 0.05], size=(5, 5)) print("\nRandom primes:\n", rand1) arr = np.arange(1, 10) random.shuffle(arr) print("\nRandom shuffle:\n", arr) print("\nRandom permutation:\n", random.permutation(np.arange(1, 10)))
def swap_channels(self, img): # Apply channel swap if random.randint(2): img = img[..., random.permutation(3)] return img
from itertools import permutations from numpy.random import permutation import numpy All = open('sample100.txt','r+') line = All.readlines() L = sum(1 for line in line) l = L//2 M = chararray((l,2)) M = chararray(M.shape, itemsize=1000) M[:] = 'NA' i = 0 j = 0 while i < L: M[j,0] = line[i] M[j,1] = line[i+1] i += 2 j += 1 srs = permutation(arange(l)) Rand = M[srs,:] Rand = concatenate(Rand) j = 0 file = open("random_sample1000.txt","w") while j < len(Rand): file.write("%s" %Rand[j].decode('utf-8') + '\n') j += 1 file.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--save', type=str, default='work/icnn') parser.add_argument('--nEpoch', type=int, default=100) parser.add_argument('--trainBatchSz', type=int, default=128) parser.add_argument('--layerSizes', type=int, nargs='+', default=[600, 600]) # parser.add_argument('--testBatchSz', type=int, default=2048) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--data', type=str) parser.add_argument('--valSplit', type=float, default=0) parser.add_argument('--noncvx', action='store_true') parser.add_argument('--inference_lr', type=float, default=0.01) parser.add_argument('--inference_momentum', type=float, default=0.5) parser.add_argument('--inference_nIter', type=int, default=10) args = parser.parse_args() npr.seed(args.seed) tf.set_random_seed(args.seed) save = os.path.expanduser(args.save) if not os.path.isdir(save): os.makedirs(save, exist_ok=True) if args.data: print("Loading data from: ", args.data) with open(args.data, 'rb') as f: data = pickle.load(f) else: data = bibsonomy.loadBibtex("data/bibtex") nTest = data['testX'].shape[0] nFeatures = data['trainX'].shape[1] nLabels = data['trainY'].shape[1] nXy = nFeatures + nLabels nTrain_orig = data['trainX'].shape[0] nVal = int(args.valSplit * nTrain_orig) nTrain = nTrain_orig - nVal if args.valSplit > 0: I = npr.permutation(nTrain_orig) trainI, valI = I[:nTrain], I[nVal:] trainX = data['trainX'][trainI, :] trainY = data['trainY'][trainI, :] valX = data['trainX'][valI, :] valY = data['trainY'][valI, :] else: trainX = data['trainX'] trainY = data['trainY'] print("\n\n" + "=" * 40) print("+ nTrain: {}, nTest: {}".format(nTrain, nTest)) print("+ nFeatures: {}, nLabels: {}".format(nFeatures, nLabels)) print("=" * 40 + "\n\n") config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = Model(nFeatures, nLabels, args, sess) if args.valSplit > 0: model.train(args, trainX, trainY, valX, valY) else: model.train(args, trainX, trainY, data['testX'], data['testY'])
def shuffle(a): p = rand.permutation(len(a)) shuffled = a[p] #print(a, shuffled) return shuffled
# Helper function to plot images by index in the validation set: # In[ ]: def plots_idx(idx, titles=None): plots([image.load_img(path + 'valid/' + filenames[i]) for i in idx], titles=titles) # In[ ]: #1. A few correct labels at random correct = np.where(preds == val_labels[:, 1])[0] idx = permutation(correct)[:n_view] plots_idx(idx, probs[idx]) # In[ ]: #2. A few incorrect labels at random incorrect = np.where(preds != val_labels[:, 1])[0] idx = permutation(incorrect)[:n_view] plots_idx(idx, probs[idx]) # In[ ]: #3. The images we most confident were cats, and are actually cats correct_cats = np.where((preds == 0) & (preds == val_labels[:, 1]))[0] most_correct_cats = np.argsort(probs[correct_cats])[::-1][:n_view] plots_idx(correct_cats[most_correct_cats],
def _shuffle(self): self._perm = npr.permutation(np.arange(self._len)) self._cur = 0
def sample_df(df, pct=0.1, nr=100): ''' 采样 随机取a行''' a = max(int(pct * df.shape[0]), int(nr)) return df.loc[permutation(df.index)[:a], :]
def entropy(points, logp, N_entropy=10000, N_norm=2500): r""" Return entropy estimate and uncertainty from a random sample. *points* is a set of draws from an underlying distribution, as returned by a Markov chain Monte Carlo process for example. *logp* is the log-likelihood for each draw. *N_norm* is the number of points $k$ to use to estimate the posterior density normalization factor $P(D) = \hat N$, converting from $\log( P(D|M) P(M) )$ to $\log( P(D|M)P(M)/P(D) )$. The relative uncertainty $\Delta\hat S/\hat S$ scales with $\sqrt{k}$, with the default *N_norm=2500* corresponding to 2% relative uncertainty. Computation cost is $O(nk)$ where $n$ is number of points in the draw. *N_entropy* is the number of points used to estimate the entropy $\hat S = - \int P(M|D) \log P(M|D)$ from the normalized log likelihood values. """ # Use a random subset to estimate density if N_norm >= len(logp): norm_points = points else: idx = permutation(len(points))[:N_entropy] norm_points = points[idx] # Use a different subset to estimate the scale factor between density # and logp. if N_entropy >= len(logp): entropy_points, eval_logp = points, logp else: idx = permutation(len(points))[:N_entropy] entropy_points, eval_logp = points[idx], logp[idx] """ # Try again, just using the points from the high probability regions # to determine the scale factor N_norm = min(len(logp), 5000) N_entropy = int(0.8*N_norm) idx = np.argsort(logp) norm_points = points[idx[-N_norm:]] entropy_points = points[idx[-N_entropy:]] eval_logp = logp[idx[-N_entropy:]] """ # Normalize p to a peak probability of 1 so that exp() doesn't underflow. # # This should be okay since for the normalizing constant C: # # u' = e^(ln u + ln C) = e^(ln u)e^(ln C) = C u # # Using eq. 11 of Kramer with u' substituted for u: # # N_est = < u'/p > = < C u/p > = C < u/p > # # S_est = - < ln q > # = - < ln (u'/N_est) > # = - < ln C + ln u - ln (C <u/p>) > # = - < ln u + ln C - ln C - ln <u/p> > # = - < ln u - ln <u/p> > # = - < ln u > + ln <u/p> # # Uncertainty comes from eq. 13: # # N_err^2 = 1/(k-1) sum( (u'/p - <u'/p>)^2 ) # = 1/(k-1) sum( (C u/p - <C u/p>)^2 ) # = C^2 std(u/p)^2 # S_err = std(u'/p) / <u'/p> = (C std(u/p))/(C <u/p>) = std(u/p)/<u/p> # # So even though the constant C shows up in N_est, N_err, it cancels # again when S_est, S_err is formed. log_scale = max(eval_logp) # print("max log sample: %g"%log_scale) eval_logp -= log_scale # Compute entropy and uncertainty in nats rho = density(norm_points, entropy_points) frac = exp(eval_logp) / rho n_est, n_err = mean(frac), std(frac) s_est = log(n_est) - mean(eval_logp) s_err = n_err / n_est #print(n_est, n_err, s_est/LN2, s_err/LN2) ##print(np.median(frac), log(np.median(frac))/LN2, log(n_est)/LN2) if False: import pylab idx = pylab.argsort(entropy_points[:, 0]) pylab.figure() pylab.subplot(221) pylab.hist(points[:, 0], bins=50, normed=True, log=True) pylab.plot(entropy_points[idx, 0], rho[idx], label='density') pylab.plot(entropy_points[idx, 0], exp(eval_logp + log_scale)[idx], label='p') pylab.ylabel("p(x)") pylab.legend() pylab.subplot(222) pylab.hist(points[:, 0], bins=50, normed=True, log=False) pylab.plot(entropy_points[idx, 0], rho[idx], label='density') pylab.plot(entropy_points[idx, 0], exp(eval_logp + log_scale)[idx], label='p') pylab.ylabel("p(x)") pylab.legend() pylab.subplot(212) pylab.plot(entropy_points[idx, 0], frac[idx], '.') pylab.xlabel("P[0] value") pylab.ylabel("p(x)/kernel density") # return entropy and uncertainty in bits return s_est / LN2, s_err / LN2
def problem3(self, s): """Test LinkedList.__len__() and LinkedList.__str__(). 10 Points.""" # LinkedList.__len__() (4 points) -------------------------------- # Empty list l1 = [int(i) for i in randint(1, 60, randint(5, 10))] l2 = s.LinkedList() points = self._eqTest( 0, len(l2), "LinkedList.__len__() failed on list {}".format(l1[:0])) # Single item l2.append(l1[0]) points += self._eqTest( 1, len(l2), "LinkedList.__len__() failed on list {}".format(l1[:1])) # Two items l2.append(l1[1]) points += self._eqTest( 2, len(l2), "LinkedList.__len__() failed on list {}".format(l1[:2])) # Many items for i in l1[2:]: l2.append(i) points += self._eqTest( len(l1), len(l2), "LinkedList.__len__() failed on list {}".format(l1)) # LinkedList.__str__() (6 points) -------------------------------- # Empty list l1 = [int(i) for i in randint(1, 60, randint(5, 10))] l2 = s.LinkedList() points += self._strTest(l1[:0], l2, "LinkedList.__str__() failed") # Single item (int) l2.append(l1[0]) points += self._strTest(l1[:1], l2, "LinkedList.__str__() failed") # Two items (int) l2.append(l1[1]) points += self._strTest(l1[:2], l2, "LinkedList.__str__() failed") # Many items (int) for i in l1[2:]: l2.append(i) points += self._strTest(l1, l2, "LinkedList.__str__() failed") # Single item (str) l1 = [str(i) for i in permutation(["a", "b", "c", "d", "e", "f"])] l2 = s.LinkedList() l2.append(l1[0]) points += self._strTest(l1[:1], l2, "LinkedList.__str__() failed") # Many items (str) for i in l1[1:]: l2.append(i) points += self._strTest(l1, l2, "LinkedList.__str__() failed") return points
def fit_js(data, log_p, max_epochs=20): """ Fit isotropic Gaussian by minimizing Jensen-Shannon divergence. """ # data dimensionality D = data.shape[0] # data and hidden states X = tt.dmatrix('X') Z = tt.dmatrix('Z') nr.seed(int(time() * 1000.) % 4294967295) idx = nr.permutation(data.shape[1])[:100] # initialize parameters b = th.shared(np.mean(data[:, idx], 1)[:, None], broadcastable=(False, True)) a = th.shared(np.std(data[:, idx] - b.get_value())) # model density log_q = lambda X: -0.5 * tt.sum(tt.square((X - b) / a), 0) - D * tt.log(tt.abs_(a)) - D / 2. * np.log(np.pi) G = lambda Z: a * Z + b # Jensen-Shannon divergence JSD = tt.mean(tt.log(tt.nnet.sigmoid(log_p(X) - log_q(X)))) \ + tt.mean(tt.log(tt.nnet.sigmoid(log_q(G(Z)) - log_p(G(Z))))) JSD = (JSD + np.log(4.)) / 2. # function computing JSD and its gradient f_jsd = th.function([Z, X], [JSD, th.grad(JSD, a), th.grad(JSD, b)]) # SGD hyperparameters B = 200 mm = 0.8 lr = .5 da = 0. db = 0. try: # display initial JSD print('{0:>4} {1:.4f}'.format(0, float(f_jsd(nr.randn(*data.shape), data)[0]))) for epoch in range(max_epochs): values = [] # stochastic gradient descent for t in range(0, data.shape[1], B): Z = nr.randn(D, B) Y = data[:, t:t + B] v, ga, gb = f_jsd(Z, Y) da = mm * da - lr * ga db = mm * db - lr * gb values.append(v) a.set_value(a.get_value() + da) b.set_value(b.get_value() + db) # reduce learning rate lr /= 2. # display estimated JSD print('{0:>4} {1:.4f}'.format(epoch + 1, np.mean(values))) except KeyboardInterrupt: pass return a.get_value() * np.eye(D), b.get_value()
# y = class3 인 학습데이터 생성 # 데이터 수 dataNumber_y3 = Y_class3 # 데이터가 평균 mu_y3 = [10, 10, 10, 10, 10] # 데이터 분산된 정도 variance_y3 = 4 # 난수 생성 data_y3 = multivariate_normal(mu_y3, np.eye(5) * variance_y3, dataNumber_y3) df_y3 = pd.DataFrame(data_y3, columns=['x1', 'x2', 'x3', 'x4', 'x5']) df_y3['y'] = 'class3' # 생성한 데이터를 하나의 DataFrame 으로 합치기 df = pd.concat([df_y1, df_y2, df_y3], ignore_index=True) # 순서에 상관없이 데이터 정렬 df_totalTrainData = df.reindex(permutation(df.index)).reset_index(drop=True) # 학습 데이터 확인 print("===== Data =====>") print(df_totalTrainData.head()) print(df_totalTrainData.tail()) # 학습데이터 shape 확인 print("df_totalTrainData Shape : {}\n".format(df_totalTrainData.shape)) # 학습데이터 전체 그래프 확인 sns.pairplot(df_totalTrainData, hue="y", height=2) plt.show() ### (2) 범주형 데이터 y컬럼 데이터 맴핑 선언 # y 컬럼 문자열 데이터를 리스트 형태로 변환 y_mapping = {
return wrapper class Solution: def insertSort(self, ls: List[int], replace=False) -> List[int]: if replace == False: nums = ls.copy() else: nums = ls Len = len(nums) if Len <= 1: return nums for i in range(1, Len): key = nums[i] for j in range(i - 1, -1, -1): if nums[j] > key: nums[j + 1] = nums[j] else: break nums[j] = key return nums ls = permutation([i for i in range(10)]) for i in range(1): ls_sorted = Solution().insertSort(ls) print(f'original list:\t{ls}') print(f'sorted list:\t{ls_sorted}')
def sample_other(other, S, F, n_rep, n_fix): fixated = np.nonzero(other)[0] indexer = list(map(lambda x: random.permutation(x)[:n_fix], np.tile(range(len(fixated)), [n_rep, 1]))) r = fixated[np.transpose(indexer)] S_rand = S[r] # Saliency map values at random locations (including fixated locations!? underestimated) return S_rand