def slice_sample_all_components_optimized(rv_mdl, glob_mdl, data, prior, rows = None, cols = None , width = None):
    log = logging.getLogger("sampling")
    rv = rv_mdl.get()
    
    if isinstance(prior, collections.Sequence):
        if len(prior) == 1:
            prior_list = [prior[0]] * len(rv.flat)
        elif len(prior) == len(rv.flat):
            prior_list = prior
        else:
            raise IndexError("Expected either one prior for all rv or one for each")
    else:
        prior_list = [prior] * len(rv.flat)
        
    cur_ll = glob_mdl.log_likelihood(data)
    
    if rows == None:
        rows = npr.permutation(rv.shape[0])
    if cols == None:
        cols = npr.permutation(rv.shape[1])
    
    for row in rows:
        for col in cols:
            log_likelihood = glob_mdl.llike_function(data, rv_mdl, (row, col))
            idx = np.ravel_multi_index((row, col), rv.shape)
            cur_ll =  slice_sample_component(rv.flat,
                                             idx,
                                             log_likelihood,
                                             prior_list[idx],
                                             cur_ll,
                                             width)
예제 #2
0
def printFiltered(clustdict, seqdict, classdict, newfasta, newclass):

    outfasta = open(newfasta, "wb")
    outclass = open(newclass, "wb")

    # iterate over clusters
    for clist in clustdict.values():

        keepIDs = []
        # if cluster larger than 1, check labels 
        if len(clist) > 1:
            pclass1 = [p for p in clist if classdict[p] == "0"]    
            pclass2 = [p for p in clist if classdict[p] == "1"]
            # choose one protein from each class for cluster 
            if pclass1: 
                keepIDs.append(random.permutation(pclass1)[0])
            if pclass2:
                keepIDs.append(random.permutation(pclass2)[0])
        else:
            keepIDs = clist    
        
        # print to new FASTA/.class file
        for ID in keepIDs:
            outfasta.write(">%s\n%s\n" % (ID, seqdict[ID]))
            outclass.write("%s\t%s\n" % (ID, classdict[ID]))

    outfasta.close()
    outclass.close()
예제 #3
0
def phaseshuffle(input_signal):
    """phaseshuffle(input_signal) phaseshuffle shuffles the phases of the component frequencies of a real signal among each other, but preserving the phase of the DC component and any Nyquist element. Input is a matrix of channels x timesteps."""

    ## Fourier Transform to Get Component Frequencies' Phases and Magnitudes
    length_of_signal = input_signal.shape[1]
    from numpy.fft import rfft, irfft
    from numpy import angle, zeros, concatenate, exp
    from numpy.random import permutation
    print("Calculating component frequencies and their phases")
    y = rfft(input_signal, axis=1)
    magnitudes = abs(y)
    phases = angle(y)

## Shuffle Phases, Preserving DC component and Nyquist element (if present)
    number_of_channels, N = y.shape
    randomized_phases = zeros(y.shape)

    print("Randomizing")
    for j in range(number_of_channels):

        if N & 1: #If there are an odd number of elements
            #Retain the DC component and shuffle the remaining components.
            order = concatenate(([0], permutation(N-1)+1), axis=1)
        else:
            #Retain the DC and Nyquist element component and shuffle the remaining components. This makes the new signal real, instead of complex.
            order = concatenate(([0], permutation(N-2)+1, [-1]), axis=1)

        randomized_phases[j] = phases[j, order]

## Construct New Signal
    print("Constructing new signal")
    y1 = magnitudes*exp(1j*randomized_phases)
    output_signal = irfft(y1,n=length_of_signal, axis=1) #While the above code will produce a real signal when given a real signal, numerical issues sometimes make the output from ifft "complex", with a +/- 0i component. Since the imaginary component is accidental and meaningless, we remove it.
    return output_signal
예제 #4
0
def subsample(sample_num, labels = None, **kwargs):
    res = ()
    if not labels is None: #return amount of sample from each label
        num_tags = len(np.unique(labels))
        reslable = None
        sm = sample_num
        for i, argname in enumerate(kwargs):
            temp = None
            val = kwargs[argname]
            for j,tag in enumerate(np.unique(labels)):
                val = kwargs[argname]
                p = permutation(sm)
                if temp is None:
                    temp = val[labels == tag][p]
                else:
                    temp = np.concatenate((temp,val[labels == tag][p]), axis = 0)
            res = res + (temp,)
        for j, tag in enumerate(np.unique(labels)):
            p = permutation(sm)
            if reslable is None:
                reslable = labels[labels == tag][p]
            else:
                reslable = np.concatenate((reslable,labels[labels == tag][p]), axis = 0)
        return res + (reslable,) 
    else: #just sample some points
        sm = sample_num
        for i, argname in enumerate(kwargs):
            val = kwargs[argname]
            p = permutation(sm)
            res = res + (val[p],)   
        return res
예제 #5
0
def hariri(top_stimuli, targets, distractors, forbid_identical_targets=True, suffix_characters=0):
    randomness = list(permutation(np.tile([[True], [False]], (len(top_stimuli)/2,1)))) #generate balanced randomness for left/right target placing
    
    if forbid_identical_targets:
	if suffix_characters:
	    while True:
		targets = list(permutation(targets))
		distractors = list(permutation(distractors))
		errors = 0
		for i in range(len(targets)):
		    if targets[i][:-suffix_characters] == top_stimuli[i][:-suffix_characters] or distractors[i][:-suffix_characters] == top_stimuli[i][:-suffix_characters] or targets[i][:-suffix_characters] == distractors[i][:-suffix_characters]:
			errors =+1
		if errors == 0:
		    break	    
	else:
	    while targets[0] == top_stimuli[-1]: # avoid collisions when the last stimulus in the top_stimuli and target lists are the same
		targets = list(permutation(targets))
    distractors = distractors[::-1]
    targets = targets[::-1]
    stimseq = pd.DataFrame(index=np.arange(len(top_stimuli)), columns={'emotion': [], 'emotion intensity': [], 'scrambling': [], 'gender': [], 'top face': [], 'left face': [], 'right face': [], 'correct answer': []})
    stimseq['top face'] = top_stimuli
    for pos, top_stim in enumerate(stimseq['top face']):
	is_right = randomness.pop()
	if is_right:
	    stimseq['left face'].ix[pos] = distractors.pop()
	    stimseq['right face'].ix[pos] = targets.pop()
	    stimseq['correct answer'].ix[pos] = 'right'
	else:
	    stimseq['right face'].ix[pos] = distractors.pop()
	    stimseq['left face'].ix[pos] = targets.pop()
	    stimseq['correct answer'].ix[pos] = 'left'
    return stimseq
예제 #6
0
파일: isa_test.py 프로젝트: mhahn0106/cisa
	def test_sample_posterior(self):
		isa = ISA(2, 3, num_scales=10)
		isa.A = asarray([[1., 0., 1.], [0., 1., 1.]])

		isa.initialize()

		params = isa.default_parameters()
		params['gibbs']['verbosity'] = 0
		params['gibbs']['num_iter'] = 100

		states_post = isa.sample_posterior(isa.sample(1000), params)
		states_prio = isa.sample_prior(states_post.shape[1])

		states_post = states_post.flatten()
		states_post = states_post[permutation(states_post.size)]
		states_prio = states_prio.flatten()
		states_prio = states_prio[permutation(states_prio.size)]

		# on average, posterior samples should be distributed like prior samples
		p = ks_2samp(states_post, states_prio)[1]

		self.assertGreater(p, 0.0001)

		samples = isa.sample(100)
		states = isa.sample_posterior(samples, params)

		# reconstruction should be perfect
		self.assertLess(sum(square(dot(isa.A, states) - samples).flatten()), 1e-10)
예제 #7
0
 def TwoSampleTest(self,sample1,sample2,numShuffles=1000,method='vanilla',blockSize=20):
     """
     Compute the p-value associated to the MMD between two samples
     method determines the null approximation procedure:
     ----'vanilla': standard permutation test
     ----'block': block permutation test
     ----'wild': wild bootstrap
     ----'wild-center': wild bootstrap with empirical degeneration
     """
     n1=shape(sample1)[0]
     n2=shape(sample2)[0]
     merged = concatenate( [sample1, sample2], axis=0 )
     merged_len=shape(merged)[0]
     numBlocks = merged_len/blockSize
     K=self.kernel(merged)
     mmd = mean(K[:n1,:n1])+mean(K[n1:,n1:])-2*mean(K[n1:,:n1])
     null_samples = zeros(numShuffles)
     
     if method=='vanilla':
         for i in range(numShuffles):
             pp = permutation(merged_len)
             Kpp = K[pp,:][:,pp]
             null_samples[i] = mean(Kpp[:n1,:n1])+mean(Kpp[n1:,n1:])-2*mean(Kpp[n1:,:n1])
             
     elif method=='block':
         blocks=reshape(arange(merged_len),(numBlocks,blockSize))
         for i in range(numShuffles):
             pb = permutation(numBlocks)
             pp = reshape(blocks[pb],(merged_len))
             Kpp = K[pp,:][:,pp]
             null_samples[i] = mean(Kpp[:n1,:n1])+mean(Kpp[n1:,n1:])-2*mean(Kpp[n1:,:n1])
             
     elif method=='wild' or method=='wild-center':
         if n1!=n2:
             raise ValueError("Wild bootstrap MMD available only on the same sample sizes")
         alpha = exp(-1/float(blockSize))
         coreK = K[:n1,:n1]+K[n1:,n1:]-K[n1:,:n1]-K[:n1,n1:]
         for i in range(numShuffles):
             """
             w is a draw from the Ornstein-Uhlenbeck process
             """
             w = HelperFunctions.generateOU(n=n1,alpha=alpha)
             if method=='wild-center':
                 """
                 empirical degeneration (V_{n,2} in Leucht & Neumann)
                 """
                 w = w - mean(w)
             null_samples[i]=mean(outer(w,w)*coreK)
     elif method=='wild2':
         
         alpha = exp(-1/float(blockSize))
         for i in range(numShuffles):
             wx=HelperFunctions.generateOU(n=n1,alpha=alpha)
             wx = wx - mean(wx)
             wy=HelperFunctions.generateOU(n=n2,alpha=alpha)
             wy = wy - mean(wy)
             null_samples[i]=mean(outer(wx,wx)*K[:n1,:n1])+mean(outer(wy,wy)*K[n1:,n1:])-2*mean(outer(wx,wy)*K[:n1,n1:])
     else:
         raise ValueError("Unknown null approximation method")
     return sum(mmd<null_samples)/float(numShuffles)
예제 #8
0
파일: trainer.py 프로젝트: ageek/sandbox
def make_batches(data, labels=None, batch_size=100):
    if labels is not None:
        num_labels = labels.shape[1]
        cls_data = [data[find(labels[:,i] == 1)] for i in range(num_labels)]
        cls_sizes = [d.shape[0] for d in cls_data]
        cls_sels = [permutation(range(s)) for s in cls_sizes]
        n = min(cls_sizes) * len(cls_sizes)
        batch_size = min(n, batch_size)
        lpb = batch_size / num_labels
        new_dat = []
        for i in range(n/batch_size):
            for sel, cd in zip(cls_sels, cls_data):
                new_dat.append(cd[sel[i*lpb:(i+1)*lpb]])
        if sparse.issparse(data):
            data = sparse.vstack(new_dat).tocsr()
        else:
            data = np.vstack(new_dat)
        labels = np.tile(np.repeat(np.eye(num_labels),lpb,0), (n/batch_size,1))
        n = len(labels)
        perm = range(n)
    else:
        n = data.shape[0]
        perm = permutation(range(n))
    i = 0
    while i < n:
        batch = perm[i:i+batch_size]
        i += batch_size
        yield (data[batch], None) if labels is None else (data[batch], labels[batch])
예제 #9
0
    def train_svm(self, X, y, C, iter_max):
        """Train SVM using SMO algorithm

        Args:
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector {-1, 1}
            C (float): penalty parameter
            iter_max (int): maximum number of iterations

        Returns:
            (numpy.array, list): (alpha_array, b), trained parameters
        """
        n, p = X.shape
        K = dot(X, X.T)
        alpha_array = zeros(n)
        b = [0]
        num_changed = 0
        examine_all = True

        while num_changed > 0 or examine_all:
            num_changed = 0

            if examine_all:
                alpha_index = permutation(range(n))
                for i2 in alpha_index:
                    if self.examine_example(i2, alpha_array, X, y, C, b, K):
                        num_changed += 1
                        self.loglist.append(
                            self.calc_objective_fast(alpha_array, y, K))
                        iter_max -= 1
                        if iter_max < 0:
                            break
                if iter_max < 0:
                    break
            else:
                alpha_index_nonbound = [i for i in range(n)
                                        if alpha_array[i] != 0 and alpha_array[i] != C]
                alpha_index_nonbound = permutation(alpha_index_nonbound)
                for i2 in alpha_index_nonbound:
                    if self.examine_example(i2, alpha_array, X, y, C, b, K):
                        num_changed += 1
                        self.loglist.append(
                            self.calc_objective_fast(alpha_array, y, K))
                        iter_max -= 1
                        if iter_max < 0:
                            break
                if iter_max < 0:
                    break

            # stop if the number of changed alphas are less than n / 10
            if num_changed < n / 10:
                break

            if examine_all:
                examine_all = False
            elif num_changed == 0:
                examine_all = True

        return (alpha_array, b)
예제 #10
0
def DivideDataRandom(dataByClass, numDivs, numClasses, dividedData, dividedClasses):
    sampleSize = map(lambda i : len(dataByClass[i]) // numDivs, range(numClasses))
    print sampleSize
    for j in range(numClasses):
        random.permutation(dataByClass[j])

    for i in range(numDivs):
        for j in range(numClasses):
            for k in range(sampleSize[j]):
                dividedData[i].append(dataByClass[j][sampleSize[j] * i + k])
                dividedClasses[i].append(j)
예제 #11
0
def random_classification_noise(labels, frac_flip, symm_flip):
    """Generate random classification noise by flipping a proportion
    of labels randomly.
    """
    if frac_flip > 0.0:
        num_ex = len(labels)
        if symm_flip:
            flip_idx = permutation(num_ex)[:round(frac_flip*num_ex)]
            labels[flip_idx] = -1.0*labels[flip_idx]
        else:
            flip_idx = permutation(num_ex)[:round(2.0*frac_flip*num_ex)]
            labels[flip_idx] = -1.0*labels[where(labels[flip_idx] > 0.0)]
예제 #12
0
파일: util.py 프로젝트: samfway/biotm
def BalancedKFold(y, n_folds=3, n_iter=1, indices=None, shuffle=False, random_state=None):
    """ Return class-balanced cross validation folds """ 
    y = asarray(y)
    n_samples = y.shape[0]
    unique_labels, y_inv = unique(y, return_inverse=True)
    n_classes = len(unique_labels)
    label_counts = bincount(y_inv)
    min_labels = min(label_counts)

    test_per_fold = floor(min_labels/n_folds)
    total_test = test_per_fold * n_classes
    train_per_fold = test_per_fold * (n_folds-1)
    total_train = train_per_fold * n_classes

    if train_per_fold < 1:
        raise ValueError("The least populated class has too few samples (%d) to "
                         "use %d-fold cross validation!" % (min_labels, n_folds))

    # Peform regular, stratified cross validation, but subsample all class
    # labels to even depth
    folds = []
    for t in xrange(n_iter):
        for (training, testing) in StratifiedKFold(y_inv, n_folds):
            train = []
            test = [] 
            training = permutation(training)
            testing = permutation(testing)

            saved = 0
            counts = zeros(n_classes)
            for i in training:
                if counts[y_inv[i]] < train_per_fold:
                    train.append(i)
                    counts[y_inv[i]] += 1
                    saved += 1
                    if saved >= total_train:
                        break

            saved = 0
            counts = zeros(n_classes)
            for i in testing:
                if counts[y_inv[i]] < test_per_fold:
                    test.append(i)
                    counts[y_inv[i]] += 1
                    saved += 1
                    if saved >= total_test:
                        break

            folds.append((asarray(train), asarray(test)))

    return folds 
   
    ''' 
예제 #13
0
    def _generate_x(self, shuffle_experiment_order=False, **kwargs):
        """ Transform data such that x_i contains all gene expressions 1,...,n for experiment i
        """
        self.x = self.data.copy()

        # create shuffled version to check importance of signal in actual data
        for e in self.x:
            self.x_shuffled.append(npr.permutation(e))

        if shuffle_experiment_order:
            ra = npr.permutation(list(range(len(self.x))))
            self.x = [self.x[i] for i in ra]
            self.x_shuffled = [self.x_shuffled[i] for i in ra]
예제 #14
0
def drop_samples(game, prob):
    """Drop samples from a sample game

    Samples are dropped independently with probability prob."""
    sample_map = {}
    for prof, pays in zip(np.split(game.profiles, game.sample_starts[1:]),
                          game.sample_payoffs):
        num_profiles, _, num_samples = pays.shape
        perm = rand.permutation(num_profiles)
        prof = prof[perm]
        pays = pays[perm]
        new_samples, counts = np.unique(
            rand.binomial(num_samples, prob, num_profiles), return_counts=True)
        splits = counts[:-1].cumsum()
        for num, prof_samp, pay_samp in zip(
                new_samples, np.split(prof, splits), np.split(pays, splits)):
            if num == 0:
                continue
            prof, pays = sample_map.setdefault(num, ([], []))
            prof.append(prof_samp)
            pays.append(pay_samp[..., :num])

    if sample_map:
        profiles = np.concatenate(list(itertools.chain.from_iterable(
            x[0] for x in sample_map.values())), 0)
        sample_payoffs = tuple(np.concatenate(x[1]) for x
                               in sample_map.values())
    else:  # No data
        profiles = np.empty((0, game.num_role_strats), dtype=int)
        sample_payoffs = []

    return rsgame.samplegame_copy(game, profiles, sample_payoffs, False)
예제 #15
0
 def __init__(self, data_dir, feature_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
     DataProvider.__init__(self, data_dir, feature_range, init_epoch, init_batchnum, dp_params, test)
     self.shuffle_data = dp_params['shuffle_data'] # determine whether to shuffle test data
     if 'external_meta_path' in dp_params and dp_params['external_meta_path']:
         import iread.myio as mio
         ext_meta = mio.unpickle(dp_params['external_meta_path'])
         print 'Print load external_meta for %s succussfully' % dp_params['external_meta_path']
         for item in ext_meta:
             self.batch_meta[item] = ext_meta[item]
             print '----Load %s from ext_meta succussfully' % item
         del ext_meta
     self.test = test
     self.feature_range = np.asarray(feature_range)
     self.num_feature = len(feature_range)
     self.batch_size = dp_params['batch_size']
     self.keep_data_dic = False
     if self.batch_size > self.num_feature or self.batch_size <= 0:
         raise BasicDataProviderError('Invaid batch_size %d (num_image=%d)' % (self.batch_size, self.num_feature))
     self.num_batch = (self.num_feature - 1)/ self.batch_size + 1
     self.batch_range = range(self.num_feature)
     if self.curr_batchnum not in self.batch_range:
         self.curr_batchnum = 0
     self.curr_batchnum = min(max(self.curr_batchnum, 0), self.num_feature - 1)
     self.batch_idx = self.curr_batchnum
     if test and self.shuffle_data == 0:
         # There is no need to shuffle testing data
         self.shuffled_feature_range = self.feature_range
     else:
         self.shuffled_feature_range = self.feature_range[rd.permutation(self.num_feature)]
     self.num_feature_type = len(self.batch_meta['feature_dim'])
     self.feature_dim = self.batch_meta['feature_dim']
예제 #16
0
파일: entropy.py 프로젝트: bumps/bumps
def kde_entropy_sklearn(points, n_est=None):
    """
    Use sklearn.neigbors.KernelDensity pdf to estimate entropy.

    Data is standardized before analysis.

    Sample points drawn from the kernel density estimate.

    Fails for bimodal and dirichlet, similar to statsmodels kde.
    """
    n, d = points.shape

    # Default to the full set
    if n_est is None:
        n_est = n

    # reduce size of draw to n_est
    if n_est >= n:
        x = points
    else:
        x = points[permutation(n)[:n_est]]
        n = n_est

    #logp = sklearn_log_density(points, evaluation_points=n_est)
    logp = sklearn_log_density(x, evaluation_points=x)
    H = -np.mean(logp)
    return H / LN2
예제 #17
0
def shuffle_data(train_data, train_target):
        # shuf_train_data_list, shuf_train_target_list = shuffle_data(train_data_list, train_target_list)
###{{{ 
    train_data = np.array(train_data)
    train_target = np.array(train_target)
    # print("train data shape")
    # print(train_data.shape)

    # if color_type == 1:
        # print("train_data.shape[0]: ")
        # print(train_data.shape[0])
        # train_data = train_data.reshape(train_data.shape[0], color_type,
                                        # img_rows, img_cols)
    # else:
        # train_data = train_data.transpose((0, 3, 1, 2))

    # train_target = np_utils.to_categorical(train_target, 10)
    # # train_data = train_data.astype('float32')
    # mean_pixel = [103.939, 116.779, 123.68]
    # print("type target, data: ")
    # print(type(train_target))
    # print(type(train_data))
    # print("shape target, data: ")
    # print(train_target.shape)
    # print(train_data.shape)
    # for c in xrange(3):
        # train_data[:, c, :, :] = train_data[:, c, :, :] - mean_pixel[c]
    # # train_data[:, 0, :, :] = train_data[:, 0, :, :] - mean_pixel[0]
    perm = permutation(len(train_target))
    train_data = train_data[perm]
    train_target = train_target[perm]
    return train_data, train_target
    pass
예제 #18
0
def initParameter(ini):
    '''
    This function is to generate a full ini file.
    The format of ini file: ('segment.ini')
        [file]
        datadir = ...
        savedir = ...
        [parameter]
        repeat = ...
    '''
    if not os.path.exists(ini.file.savedir):
        os.mkdir(ini.file.savedir)
    namelist = os.listdir(ini.file.datadir)
    #print namelist
    ini.file.names = namelist

    repeat_time = ini.parameter.repeat
    cnt = len(namelist)
    ini.parameter.sequence = []
    for i in range(repeat_time):
        ini.parameter.sequence += permutation(cnt).tolist()
    #print ini.parameter.sequence

    ini.parameter.current = 0
    ini.save()
    
    return namelist, repeat_time
    def plot_predictions(self):
        data = self.get_next_batch(train=False)[2] # get a test batch
        num_classes = self.test_data_provider.get_num_classes()
        NUM_ROWS = 2
        NUM_COLS = 4
        NUM_IMGS = NUM_ROWS * NUM_COLS
        NUM_TOP_CLASSES = min(num_classes, 4) # show this many top labels

        label_names = self.test_data_provider.batch_meta['label_names']
        if self.only_errors:
            preds = n.zeros((data[0].shape[1], num_classes), dtype=n.single)
        else:
            preds = n.zeros((NUM_IMGS, num_classes), dtype=n.single)
            rand_idx = nr.randint(0, data[0].shape[1], NUM_IMGS)
            print rand_idx
            data[0] = n.require(data[0][:,rand_idx], requirements='C')
            data[1] = n.require(data[1][:,rand_idx], requirements='C')
        data += [preds]
        temp = data[0]
        print data
        print temp.ndim,temp.shape,temp.size
        # Run the model
        self.libmodel.startFeatureWriter(data, self.sotmax_idx)
        self.finish_batch()

        fig = pl.figure(3)
        fig.text(.4, .95, '%s test case predictions' % ('Mistaken' if self.only_errors else 'Random'))
        if self.only_errors:
            err_idx = nr.permutation(n.where(preds.argmax(axis=1) != data[1][0,:])[0])[:NUM_IMGS] # what the net got wrong
            data[0], data[1], preds = data[0][:,err_idx], data[1][:,err_idx], preds[err_idx,:]

        data[0] = self.test_data_provider.get_plottable_data(data[0])
        for r in xrange(NUM_ROWS):
            for c in xrange(NUM_COLS):
                img_idx = r * NUM_COLS + c
                if data[0].shape[0] <= img_idx:
                    break
                pl.subplot(NUM_ROWS*2, NUM_COLS, r * 2 * NUM_COLS + c + 1)
                pl.xticks([])
                pl.yticks([])
                try:
                    img = data[0][img_idx,:,:,:]
                except IndexError:
                    # maybe greyscale?
                    img = data[0][img_idx,:,:]
                pl.imshow(img, interpolation='nearest')
                true_label = int(data[1][0,img_idx])

                img_labels = sorted(zip(preds[img_idx,:], label_names), key=lambda x: x[0])[-NUM_TOP_CLASSES:]
                pl.subplot(NUM_ROWS*2, NUM_COLS, (r * 2 + 1) * NUM_COLS + c + 1, aspect='equal')

                ylocs = n.array(range(NUM_TOP_CLASSES)) + 0.5
                height = 0.5
                width = max(ylocs)
                pl.barh(ylocs, [l[0]*width for l in img_labels], height=height, \
                        color=['r' if l[1] == label_names[true_label] else 'b' for l in img_labels])
                pl.title(label_names[true_label])
                pl.yticks(ylocs + height/2, [l[1] for l in img_labels])
                pl.xticks([width/2.0, width], ['50%', ''])
                pl.ylim(0, ylocs[-1] + height*2)
예제 #20
0
def read_and_normalize_and_shuffle_train_data(img_rows, img_cols,
                                              color_type=1):
    cache_path = os.path.join('cache', 'train_r_' + str(img_rows) +
                              '_c_' + str(img_cols) + '_t_' +
                              str(color_type) + '.dat')
    if not os.path.isfile(cache_path) or use_cache == 0:
        train_data, train_target, driver_id, unique_drivers = \
            load_train(img_rows, img_cols, color_type)
        cache_data((train_data, train_target, driver_id, unique_drivers),
                   cache_path)
    else:
        print('Restore train from cache!')
        (train_data, train_target, driver_id, unique_drivers) = \
            restore_data(cache_path)
    train_data = np.array(train_data, dtype=np.uint8)
    train_target = np.array(train_target, dtype=np.uint8)
    if color_type == 1:
        train_data = train_data.reshape(train_data.shape[0], color_type,
                                        img_rows, img_cols)
    else:
        train_data = train_data.transpose((0, 3, 1, 2))
    train_target = np_utils.to_categorical(train_target, 10)
    train_data = train_data.astype('float32')
    mean_pixel = [103.939, 116.779, 123.68]
    for c in range(3):
        train_data[:, c, :, :] = train_data[:, c, :, :] - mean_pixel[c]
    # train_data /= 255
    perm = permutation(len(train_target))
    train_data = train_data[perm]
    train_target = train_target[perm]
    print('Train shape:', train_data.shape)
    print(train_data.shape[0], 'train samples')
    return train_data, train_target, driver_id, unique_drivers
예제 #21
0
파일: crossval.py 프로젝트: ASAPPinc/Kayak
    def __init__(self, num_folds, inputs, targets=None, permute=True):
        
        if permute:
            # Make a copy of the data, with a random permutation.
            self.ordering = npr.permutation(inputs.shape[0])
            self.inputs   = inputs[self.ordering,...].copy()
            if targets is not None:
                self.targets = targets[self.ordering,...].copy()
            else:
                self.targets = None
        else:
            self.ordering = np.arange(inputs.shape[0], dtype=int)
            self.inputs   = inputs
            self.targets  = targets

        self.fold_idx  = 0
        self.num_folds = num_folds
        self.edges     = np.linspace(0, self.inputs.shape[0], self.num_folds+1).astype(int)
        self.indices   = []
        for ii in xrange(self.num_folds):
            self.indices.append( np.arange(self.edges[ii], self.edges[ii+1], dtype=int) )
        self.folds = []
        for ii in xrange(self.num_folds):
            self.folds.append(Fold(self,
                                   np.array(list(itertools.chain.from_iterable([self.indices[jj] for jj in range(0,ii)+range(ii+1,self.num_folds)])), dtype=int),
                                   np.array(self.indices[ii], dtype=int)))
    def generate(self):

        def create_nodes():
            for i in xrange(self.size):
                self.node.append(Node(i))

        def get_edge_count(node):
            base = rand.random_integers(self.edges_low, self.edges_high)
            return min(base +  condition(self.edges_spike, self.spike_prob), self.size-1) - node.degree()

        def create_edge(from_node, to_node):
            self.edge.append(Edge(from_node, to_node))
            from_node.connect(to_node)
            to_node.connect(from_node)
            #print '%d => %d' % (_from, _to)

        # start here
        create_nodes()

        for _from in xrange(self.size):
        #    print 'from=%d' % _from
            from_node = self.node[_from]
            n = get_edge_count(from_node)
            if n > 0:
                connections = 0
                for _to in rand.permutation(self.size):
                #    print 'to=%d' % _to
                    to_node = self.node[_to]
                    if _to != _from and not from_node.is_connected(to_node):
                        create_edge(from_node, to_node)
                        connections += 1
                        if connections == n:
                            break
예제 #23
0
def trte_split(X, Y, tr_frac):
    """Split the data in X/Y into training and testing portions."""
    if gp.is_garray(X):
        X = X.as_numpy_array()
    else:
        X = np.array(X)
    if gp.is_garray(Y):
        Y = Y.as_numpy_array()
    else:
        Y = np.array(Y)
    obs_count = X.shape[0]
    obs_dim = X.shape[1]
    tr_count = round(tr_frac * obs_count)
    te_count = obs_count - tr_count
    Xtr = np.zeros((tr_count, X.shape[1]))
    Ytr = np.zeros((tr_count, Y.shape[1]))
    Xte = np.zeros((te_count, X.shape[1]))
    Yte = np.zeros((te_count, Y.shape[1]))
    idx = npr.permutation(range(obs_count))
    # Basic manual iteration
    for i in range(obs_count):
        if (i < tr_count):
            Xtr[i,:] = X[idx[i],:]
            Ytr[i,:] = Y[idx[i],:]
        else:
            Xte[(i - tr_count),:] = X[idx[i],:]
            Yte[(i - tr_count),:] = Y[idx[i],:]
    return [gp.garray(Xtr), gp.garray(Ytr), gp.garray(Xte), gp.garray(Yte)]
예제 #24
0
파일: data.py 프로젝트: aerows/NLP1-Project
 def bootstrap(self):
     # TODO: Discuss this implementation: do we want a random permutation over the first 60% of the dataset,
     # TODO: or rather take random elements from the dataset, resulting in a 60% coverage on average?
     n = self.number_of_samples()
     last_index = int(np.floor(n * 0.6))
     indexes = rnd.permutation(n)[0:last_index]
     return self._data()[indexes], self._labels()[indexes]
예제 #25
0
def read_and_normalize_train_data():
    train_data, train_target, train_id = load_train()

    print('Convert to numpy...')
    train_data = np.array(train_data, dtype=np.uint8)
    train_target = np.array(train_target, dtype=np.uint8)

    print('Reshape...')
    train_data = train_data.transpose((0, 3, 1, 2))

    print('Convert to float...')
    train_data = train_data.astype('float32')
    mean_pixel = [103.939, 116.779, 123.68]
    print('Substract 0...')
    train_data[:, 0, :, :] -= mean_pixel[0]
    print('Substract 1...')
    train_data[:, 1, :, :] -= mean_pixel[1]
    print('Substract 2...')
    train_data[:, 2, :, :] -= mean_pixel[2]

    train_target = np_utils.to_categorical(train_target, 8)

    # Shuffle experiment START !!!
    perm = permutation(len(train_target))
    train_data = train_data[perm]
    train_target = train_target[perm]
    # Shuffle experiment END !!!

    print('Train shape:', train_data.shape)
    print(train_data.shape[0], 'train samples')
    return train_data, train_target, train_id
def subset(L, cardinality):
    k, n = L.shape
    orderedIdx = arange(n)
    messedIdx = permutation(orderedIdx)
    rightIdx = messedIdx[:cardinality]
    rightData = L[:,rightIdx]
    return rightData
예제 #27
0
    def resolve(self):

        t0 = time()
        self.time = 0
        while not self.balanced() and not self.force_end():
            for a_id in permutation(self.nb_ants):
                if self.ants[a_id].action(self.time):
                    continue
            self.time += 1
            if self.time % 100000 == 0:
                self.verbose(1, 'Time: %d' % self.time)

                self.verbose(1, 'Nb of uncontrolled sites: %d' %
                             self.uncontrolled)
                sizes = [str(a.area_size) for a in self.ants]
                self.verbose(1, 'Territories: %s' % ','.join(sizes))
                if self.uncontrolled > 0:
                    delta_t = time() - self.tmark
                    duration = format_duration(delta_t)
                    delta_site = self.prev_uncontrolled - self.uncontrolled
                    self.verbose(1, '%d sites taken in %s'
                                 % (delta_site, duration))
                    self.total_time += delta_t
                    speed = (self.nb_sites - self.uncontrolled) / \
                        self.total_time
                    expected_duration = self.uncontrolled / speed
                    self.verbose(1, '%s expected for complete control'
                                 % (format_duration(expected_duration)))

                    self.prev_uncontrolled = self.uncontrolled
                    self.tmark = time()

        self.verbose(1, 'Balloon partitioning done, took %s, %d iterations'
                     % (format_duration(time() - t0), self.time))
예제 #28
0
def preprocess(data):
	"""
	Log-transforms, centers and symmetrically whitens data.

	@type  data: array_like
	@param data: data points stored in columns
	"""

	# log-transform
	data = log(data + 1.)

	# center
	data = data - mean(data, 1).reshape(-1, 1)

	# shuffle
	data = data[:, permutation(data.shape[1])]

	# find eigenvectors
	eigvals, eigvecs = eig(cov(data))

	# eliminate eigenvectors whose eigenvalues are zero
	eigvecs = eigvecs[:, eigvals > 0]
	eigvals = eigvals[eigvals > 0]

	# symmetric whitening matrix
	whitening_matrix = dot(eigvecs, dot(diag(1. / sqrt(eigvals)), eigvecs.T))

	# whiten data
	return dot(whitening_matrix, data)
예제 #29
0
  def validation_data(self, folds):
    """
    Performs data splitting, classifier training and prediction for given #folds
    :param folds: number of folds
    :return: list of numpy.array pairs (prediction, expected)
    """
    df = self.data_frame
    response = []

    assert len(df) > folds

    perms = array_split(permutation(len(df)), folds)

    for i in range(folds):
      train_idxs = list(range(folds))
      train_idxs.pop(i)
      train = []
      for idx in train_idxs:
        train.append(perms[idx])

      train = concatenate(train)

      test_idx = perms[i]

      training = df.iloc[train]
      test_data = df.iloc[test_idx]

      y = self.__factorize(training)
      classifier = self.train(training[self.features], y)
      predictions = classifier.predict(test_data[self.features])

      expected = self.__factorize(test_data)
      response.append([predictions, expected])

    return response
예제 #30
0
파일: entropy.py 프로젝트: bumps/bumps
def kde_entropy_statsmodels(points, n_est=None):
    """
    Use statsmodels KDEMultivariate pdf to estimate entropy.

    Density evaluated at sample points.

    Slow and fails for bimodal, dirichlet; poor for high dimensional MVN.
    """
    from statsmodels.nonparametric.kernel_density import KDEMultivariate
    n, d = points.shape

    # Default to the full set
    if n_est is None:
        n_est = n

    # reduce size of draw to n_est
    if n_est >= n:
        x = points
    else:
        x = points[permutation(n)[:n_est]]
        n = n_est

    predictor = KDEMultivariate(data=x, var_type='c'*d)
    p = predictor.pdf()
    H = -np.mean(log(p))
    return H / LN2
예제 #31
0
파일: train.py 프로젝트: GaiYu0/hackathon
def main(args):
    # load and preprocess dataset
#   data = load_data(args)

    u = np.load('u.npy')
    v = np.load('v.npy')
    x = np.load('x.npy')
    y = np.load('y.npy')

    dat = np.ones_like(u)
    n = len(x)
    adj = sps.coo_matrix((dat, (u, v)), shape=[n, n]).maximum(sps.eye(n))
#   adj = sps.eye(n, n)

    data = type('', (), {})
    data.graph = dgl.graph_index.create_graph_index(adj, readonly=True, multigraph=False)
    data.features = x
    data.labels = y
    data.num_labels = len(np.unique(y))
    data.train_mask = np.zeros(n)
    data.val_mask = np.zeros(n)
    data.test_mask = np.zeros(n)
    p = npr.permutation(n)
    data.train_mask[p[:args.n_train]] = 1
    data.val_mask[p[args.n_train : args.n_train + args.n_val]] = 1
    data.test_mask[p[args.n_train + args.n_val:]] = 1

    if args.gpu >= 0:
        ctx = mx.gpu(args.gpu)
    else:
        ctx = mx.cpu()

    if args.self_loop and not args.dataset.startswith('reddit'):
        data.graph.add_edges_from([(i,i) for i in range(len(data.graph))])

    train_nid = mx.nd.array(np.nonzero(data.train_mask)[0]).astype(np.int64)
    test_nid = mx.nd.array(np.nonzero(data.test_mask)[0]).astype(np.int64)

    features = mx.nd.array(data.features)
    labels = mx.nd.array(data.labels)
    train_mask = mx.nd.array(data.train_mask)
    val_mask = mx.nd.array(data.val_mask)
    test_mask = mx.nd.array(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    n_train_samples = train_mask.sum().asscalar()
    n_val_samples = val_mask.sum().asscalar()
    n_test_samples = test_mask.sum().asscalar()

    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
              n_train_samples,
              n_val_samples,
              n_test_samples))

    # create GCN model
    g = DGLGraph(data.graph, readonly=True)
    g.ndata['features'] = features
    g.ndata['labels'] = labels

    if args.model == "mlp":
        mlp_train(ctx, args, n_classes, features, labels, train_mask, val_mask, test_mask)
    elif args.model == "gcn_ns":
        gcn_ns_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples)
    elif args.model == "gcn_cv":
        gcn_cv_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples, False)
    elif args.model == "graphsage_cv":
        graphsage_cv_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples, False)
    else:
        print("unknown model. Please choose from gcn_ns, gcn_cv, graphsage_cv")
예제 #32
0
def split_train_test(data, test_ratio):
    shuffled_indices = rnd.permutation(len(data))
    test_set_size = int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]
예제 #33
0
plt.ion()

air = MaterialTable.fromMaterial('air', 0.4, 1)
BK7 = MaterialTable.fromMaterial('BK7', 0.4, 1)
MgF2 = MaterialTable.fromMaterial('MgF2', 0.4, 1)
Ta2O5 = MaterialTable.fromMaterial('Ta2O5', 0.4, 1)
AlAs = MaterialTable.fromMaterial('AlAs', 0.4, 1)
SiO2 = MaterialTable.fromMaterial('SiO2', 0.4, 1)

default = [MgF2, Ta2O5, AlAs, SiO2]

permutation_stack = []

for i in range(100):
    permutation_stack += list(permutation(default))

wl = _np.arange(0.4, 1, 0.005)


def getThicknesses(l=0.5, layers=2):
    widths = []
    stack = [air] + permutation_stack[:layers] + [BK7]
    TM = TransferMatrix(stack, [1] * layers)
    IM = [TM.getInterfacialMatrix(i, l, 0, True) for i in range(-1, layers)]
    p0 = _np.mod(_np.angle(-IM[0][1, 0] / IM[0][1, 1]),
                 2 * _const.pi)  # phase from immediate reflection
    phase = _np.mod(
        _np.angle(1 - (IM[0][1, 0] / IM[0][1, 1])**2), 2 * _const.pi
    )  # Phase change of thransmission through the first interface and back
    for i in range(layers):
예제 #34
0
    def test_spatial_searchlight(self,
                                 lrn_sllrn_SL_partitioner,
                                 do_roi=False,
                                 results_backend='native'):
        """Tests both generic and ad-hoc searchlights (e.g. GNBSearchlight)
        Test of and adhoc searchlight anyways requires a ground-truth
        comparison to the generic version, so we are doing sweepargs here
        """
        lrn, sllrn, SL, partitioner = lrn_sllrn_SL_partitioner
        ## if results_backend == 'hdf5' and not common_variance:
        ##     # no need for full combination of all possible arguments here
        ##     return

        if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active \
           and  isinstance(lrn, ChainMapper):
            raise SkipTest("Known to fail while trying to enable "
                           "training_stats for the ChainMapper (M1NN here)")

        # e.g. for M1NN we need plain kNN(1) for m1nnsl, but to imitate m1nn
        #      "learner" we must use a chainmapper atm
        if sllrn is None:
            sllrn = lrn
        ds = datasets['3dsmall'].copy()
        # Let's test multiclass here, so boost # of labels
        ds[6:18].T += 2
        ds.fa['voxel_indices'] = ds.fa.myspace

        # To assure that users do not run into incorrect operation due to overflows
        ds.samples += 5000
        ds.samples *= 1000
        ds.samples = ds.samples.astype(np.int16)

        # compute N-1 cross-validation for each sphere
        # YOH: unfortunately sample_clf_lin is not guaranteed
        #      to provide exactly the same results due to inherent
        #      iterative process.  Therefore lets use something quick
        #      and pure Python
        cv = CrossValidation(lrn, partitioner)

        skwargs = dict(
            radius=1,
            enable_ca=['roi_sizes', 'raw_results', 'roi_feature_ids'])

        if do_roi:
            # select some random set of features
            nroi = rnd.randint(1, ds.nfeatures)
            # and lets compute the full one as well once again so we have a reference
            # which will be excluded itself from comparisons but values will be compared
            # for selected roi_id
            sl_all = SL(sllrn, partitioner, **skwargs)
            result_all = sl_all(ds)
            # select random features
            roi_ids = rnd.permutation(range(ds.nfeatures))[:nroi]
            skwargs['center_ids'] = roi_ids
        else:
            nroi = ds.nfeatures
            roi_ids = np.arange(nroi)
            result_all = None

        if results_backend == 'hdf5':
            skip_if_no_external('h5py')

        sls = [
            sphere_searchlight(cv, results_backend=results_backend, **skwargs),
            #GNBSearchlight(gnb, NFoldPartitioner(cvtype=1))
            SL(sllrn, partitioner, indexsum='fancy', **skwargs)
        ]

        if externals.exists('scipy'):
            sls += [SL(sllrn, partitioner, indexsum='sparse', **skwargs)]

        # Test nproc just once
        if externals.exists('pprocess') and not self._tested_pprocess:
            sls += [sphere_searchlight(cv, nproc=2, **skwargs)]
            self._tested_pprocess = True

        # Provide the dataset and all those searchlights for testing
        #self._test_searchlights(ds, sls, roi_ids, result_all)
        #nroi = len(roi_ids)
        #do_roi = nroi != ds.nfeatures
        all_results = []
        for sl in sls:
            # run searchlight
            mvpa2.seed()  # reseed rng again for m1nnsl
            results = sl(ds)
            all_results.append(results)
            #print `sl`
            # check for correct number of spheres
            self.assertTrue(results.nfeatures == nroi)
            # and measures (one per xfold)
            if partitioner.cvtype == 1:
                self.assertTrue(len(results) == len(ds.UC))
            elif partitioner.cvtype == 0.5:
                # here we had 4 unique chunks, so 6 combinations
                # even though 20 max was specified for NFold
                self.assertTrue(len(results) == 6)
            else:
                raise RuntimeError("Unknown yet type of partitioner to check")
            # check for chance-level performance across all spheres
            # makes sense only if number of features was big enough
            # to get some stable estimate of mean
            if not do_roi or nroi > 20:
                # was for binary, somewhat labile with M1NN
                #self.assertTrue(0.4 < results.samples.mean() < 0.6)
                self.assertTrue(0.68 < results.samples.mean() < 0.82)

            mean_errors = results.samples.mean(axis=0)
            # that we do get different errors ;)
            self.assertTrue(len(np.unique(mean_errors) > 3))

            # check resonable sphere sizes
            self.assertTrue(len(sl.ca.roi_sizes) == nroi)
            self.assertTrue(len(sl.ca.roi_feature_ids) == nroi)
            for i, fids in enumerate(sl.ca.roi_feature_ids):
                self.assertTrue(len(fids) == sl.ca.roi_sizes[i])
            if do_roi:
                # for roi we should relax conditions a bit
                self.assertTrue(max(sl.ca.roi_sizes) <= 7)
                self.assertTrue(min(sl.ca.roi_sizes) >= 4)
            else:
                self.assertTrue(max(sl.ca.roi_sizes) == 7)
                self.assertTrue(min(sl.ca.roi_sizes) == 4)

            # check base-class state
            self.assertEqual(sl.ca.raw_results.nfeatures, nroi)

            # Test if we got results correctly for 'selected' roi ids
            if do_roi:
                assert_array_equal(result_all[:, roi_ids], results)

        if len(all_results) > 1:
            # if we had multiple searchlights, we can check either they all
            # gave the same result (they should have)
            aresults = np.array([a.samples for a in all_results])
            dresults = np.abs(aresults - aresults.mean(axis=0))
            dmax = np.max(dresults)
            self.assertTrue(dmax <= 1e-13)

        # Test the searchlight's reuse of neighbors
        for indexsum in ['fancy'] + (externals.exists('scipy') and ['sparse']
                                     or []):
            sl = SL(sllrn,
                    partitioner,
                    indexsum='fancy',
                    reuse_neighbors=True,
                    **skwargs)
            mvpa2.seed()
            result1 = sl(ds)
            mvpa2.seed()
            result2 = sl(ds)  # must be faster
            assert_array_equal(result1, result2)
예제 #35
0
"""
numpy随机模块的排列运算
    shuffle(x)
        |- 对x的数据进行直接洗牌。
    permutation(x)
        |- 利用x的数据洗牌产生新的拷贝数据。
"""

import numpy as np
import numpy.random as rd

a = np.arange(0, 10)
r = rd.shuffle(a)  # 没有返回值,a被洗牌
print('shuffle:', a, r)

a = np.arange(0, 10)
r = rd.permutation(a)  # 返回洗牌的结果
print('permutation:', a, r)
예제 #36
0
def shuffle(A, d=5):
    n = A.shape[0]
    r = randint(1, d + 1, (n, 1))
    A += permutation(A) * r
    return A
예제 #37
0
    def runMousefunc(self, nFold=3, nTree=250, criterion="gini", density=0.1):
        """
			CV: -1 => total model (no cv)
			CV: nFold => mean metric over cv
		"""
        self.__database.createGOIDView(self.__goidtable,
                                       double=["AUROC", "AUPR", "Fmax"],
                                       drop=True)
        self.__database.createProteinView(self.__proteintable, double=["ProteinID", "Label", "Score"], \
             drop=True)

        # Get labels
        test = 0
        pp = permutation(self.__numproteins)
        resultid = 0
        for goid in self.__goid:
            print "____________ GOID %d ____________" % goid
            # Get label for GOID
            goidindex = where(self.__goid == goid)
            goidindex = int(goidindex[0])
            print goidindex
            annotations = self.selectAnnotatedProteinsMousefunc(goidindex)

            print "0s=", len([x for x in annotations if x == 0])
            print "1s=", len([x for x in annotations if x == 1])
            print "-1s=", len([x for x in annotations if x == -1])

            annotation = []
            for value in annotations:
                annotation.append(value)

            annotation = asarray(annotation).astype(float64)
            annotation = annotation.ravel()
            print annotation.shape

            #model = RandomForestClassifier(n_estimators=100, n_jobs=-1)
            model = RandomForestClassifier(n_estimators=nTree, criterion=criterion, compute_importances=True,\
                min_density=density, bootstrap=True, max_features="auto", \
                n_jobs=8, verbose=1)
            model.fit(self.__network, annotation)

            scores = model.predict_proba(self.__network)
            per = Performance(annotations, scores[:, 0])
            roc = per.AUROCGillis()
            print "AUROC= ", roc
            pr = per.AUPRGillis()
            print "AUPR= ", pr
            fmax = per.Fmax()
            print "Fmax= ", fmax

            self.__database.insertProteinView(self.__proteintable, resultid, goid[0], -1, self.__proteins, \
                 annotations, scores[:,0])
            self.__database.insertGOIDView(self.__goidtable, resultid, goid[0],
                                           -1, [roc, pr, fmax])
            resultid += 1

            del per
            #break

            labelIx = range(self.__numproteins)
            offset = 0
            fold = 0
            meanroc = []
            meanpr = []
            meanfmax = []

            while fold < nFold:
                print "____________ Fold= %d ____________" % fold
                lastelem = min(self.__numproteins,
                               offset + floor(self.__numproteins / nFold))
                ix = []
                for index in pp[offset + 1:lastelem]:
                    ix.append(labelIx[index])

                offset = lastelem

                labeltmp = []
                for value in annotations:
                    labeltmp.append(float(value))

                for index in ix:
                    labeltmp[index] = 0

                labeltmp = asarray(labeltmp).astype(float64)
                labeltmp = labeltmp.ravel()

                print "0s=", len([x for x in labeltmp if x == 0])
                print "1s=", len([x for x in labeltmp if x == 1])
                print "-1s=", len([x for x in labeltmp if x == -1])

                model = RandomForestClassifier(n_estimators=nTree, criterion=criterion, compute_importances=True,\
                                                                            min_density=density, bootstrap=True, max_features="auto", \
                                                                            n_jobs=8, verbose=1)
                model.fit(self.__network, labeltmp)
                scores = model.predict_proba(self.__network)

                scores = scores[:, 0]

                score = []
                annotation = []
                proteins = []
                for index in ix:
                    score.append(float(scores[index]))
                    annotation.append(annotations[index])
                    proteins.append(self.__proteins[index])

                per = Performance(annotation, score)
                roc = per.AUROCGillis()
                print "AUROC= ", roc
                meanroc.append(roc)
                pr = per.AUPRGillis()
                print "AUPR= ", pr
                meanpr.append(pr)
                fmax = per.Fmax()
                print "Fmax= ", fmax
                meanfmax.append(fmax)

                self.__database.insertGOIDView(self.__goidtable, resultid,
                                               goid[0], fold, [roc, pr, fmax])
                self.__database.insertProteinView(self.__proteintable, resultid, goid[0], fold, \
                     proteins, annotation, score)

                del annotation
                del score
                del per
                fold += 1
                resultid += 1

            roc_mean = reduce(lambda x, y: x + y / float(len(meanroc)),
                              meanroc, 0)
            print "Mean AUROC= ", roc_mean
            pr_mean = reduce(lambda x, y: x + y / float(len(meanpr)), meanpr,
                             0)
            print "Mean AUPR= ", pr_mean
            fmax_mean = reduce(lambda x, y: x + y / float(len(meanfmax)),
                               meanfmax, 0)
            print "Mean Fmax= ", fmax_mean

            self.__database.insertGOIDView(self.__goidtable, resultid, goid[0], nFold, \
                [roc_mean, pr_mean, fmax_mean])
            resultid += 1

            test += 1
예제 #38
0
        i += len(classes)
    epoch_loss = running_loss / size
    epoch_acc = running_corrects.data.item() / size
    print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
    return predictions, all_proba, all_classes


predictions, all_proba, all_classes = test_model2(model_vgg.classifier,
                                                  size=dset_sizes['valid'])

# Number of images to view for each visualization task
n_view = 8
correct = np.where(predictions == all_classes)[0]

from numpy.random import random, permutation
idx = permutation(correct)[:n_view]

loader_correct = torch.utils.data.DataLoader([dsets['valid'][x] for x in idx],
                                             batch_size=n_view,
                                             shuffle=True)
for data in loader_correct:
    inputs_cor, labels_cor = data

# Make a grid from batch
out = torchvision.utils.make_grid(inputs_cor)
imshow(out, title=[l.item() for l in labels_cor])

from IPython.display import Image, display
for x in idx:
    display(Image(filename=dsets['valid'].imgs[x][0], retina=True))
예제 #39
0
def tran_kfolds(
    df,
    k=None,
    ft=None,
    out=None,
    var_fold=None,
    suffix="_mean",
    summaries=None,
    tf=tf_summarize,
    shuffle=True,
    seed=None,
):
    r"""Perform k-fold CV

    Perform k-fold cross-validation (CV) using a given fitting procedure (ft). Optionally provide a fold identifier column, or (randomly) assign folds.

    Args:
        df (DataFrame): Data to pass to given fitting procedure
        ft (gr.ft_): Partially-evaluated grama fit function; defines model fitting
            procedure and outputs to aggregate
        tf (gr.tf_): Partially-evaluated grama transform function; evaluation of
            fitted model will be passed to tf and provided with keyword arguments
            from summaries
        out (list or None): Outputs for which to compute `summaries`; None uses ft.out
        var_fold (str or None): Column to treat as fold identifier; overrides `k`
        suffix (str): Suffix for predicted value; used to distinguish between predicted and actual
        summaries (dict of functions): Summary functions to pass to tf; will be evaluated
            for outputs of ft. Each summary must have signature summary(f_pred, f_meas).
            Grama includes builtin options: gr.mse, gr.rmse, gr.rel_mse, gr.rsq, gr.ndme
        k (int): Number of folds; k=5 to k=10 recommended [1]
        shuffle (bool): Shuffle the data before CV? True recommended [1]

    Notes:
        - Many grama functions support *partial evaluation*; this allows one to specify things like hyperparameters in fitting functions without providing data and executing the fit. You can take advantage of this functionality to easly do hyperparameter studies.

    Returns:
        DataFrame: Aggregated results within each of k-folds using given model and
            summary transform

    References:
        [1] James, Witten, Hastie, and Tibshirani, "An introduction to statistical learning" (2017), Chapter 5. Resampling Methods

    Examples::

        import grama as gr
        from grama.data import df_stang
        from grama.fit import ft_rf
        df_kfolds = (
            df_stang
            >> gr.tf_kfolds(
                k=5,
                ft=ft_rf(out=["thick"], var=["E", "mu"]),
            )

    """
    ## Check invariants
    if ft is None:
        raise ValueError("Must provide ft keyword argument")
    if (k is None) and (var_fold is None):
        print("... tran_kfolds is using default k=5")
        k = 5
    if summaries is None:
        print("... tran_kfolds is using default summaries mse and rsq")
        summaries = dict(mse=mse, rsq=rsq)

    n = df.shape[0]
    ## Handle custom folds
    if not (var_fold is None):
        ## Check for a valid var_fold
        if not (var_fold in df.columns):
            raise ValueError("var_fold must be in df.columns or None")
        ## Build folds
        levels = unique(df[var_fold])
        k = len(levels)
        print("... tran_kfolds found {} levels via var_folds".format(k))
        Is = []
        for l in levels:
            Is.append(list(arange(n)[df[var_fold] == l]))

    else:
        ## Shuffle data indices
        if shuffle:
            if seed:
                set_seed(seed)
            I = permutation(n)
        else:
            I = arange(n)
        ## Build folds
        di = int(ceil(n / k))
        Is = [I[i * di : min((i + 1) * di, n)] for i in range(k)]

    ## Iterate over folds
    df_res = DataFrame()
    for i in range(k):
        ## Train by out-of-fold data
        md_fit = df >> tf_filter(~var_in(X.index, Is[i])) >> ft

        ## Determine predicted and actual
        if out is None:
            out = str_replace(md_fit.out, suffix, "")
        else:
            out = str_replace(out, suffix, "")

        ## Test by in-fold data
        df_pred = md_fit >> ev_df(
            df=df >> tf_filter(var_in(X.index, Is[i])), append=False
        )

        ## Specialize summaries for output names
        summaries_all = ChainMap(
            *[
                {
                    key + "_" + o: fun(X[o + suffix], X[o])
                    for key, fun in summaries.items()
                }
                for o in out
            ]
        )

        ## Aggregate
        df_summary_tmp = (
            df_pred
            >> tf_bind_cols(df[out] >> tf_filter(var_in(X.index, Is[i])))
            >> tf(**summaries_all)
            # >> tf_mutate(_kfold=i)
        )

        if var_fold is None:
            df_summary_tmp = df_summary_tmp >> tf_mutate(_kfold=i)
        else:
            df_summary_tmp[var_fold] = levels[i]

        df_res = concat((df_res, df_summary_tmp), axis=0).reset_index(drop=True)

    return df_res
예제 #40
0
파일: learn.py 프로젝트: gauenk/cl_gen
def write_input_output(cfg, model, burst, aligned, denoised, filters, motion):
    """
    :params burst: input images to the model, :shape [B, N, C, H, W]
    :params aligned: output images from the alignment layers, :shape [B, N, C, H, W]
    :params denoised: output images from the denoiser, :shape [B, N, C, H, W]
    :params filters: filters used by model, :shape [B, L, N, K2, 1, Hf, Wf] with Hf = (H or 1) for L = number of cascaded filters
    """

    # -- file path --
    path = Path(f"./output/n2sim/io_examples/{cfg.exp_name}/")
    if not path.exists(): path.mkdir(parents=True)

    # -- init --
    B, N, C, H, W = burst.shape

    # -- save histogram of residuals --
    denoised_np = denoised.detach().cpu().numpy()
    plot_histogram_residuals_batch(denoised_np,
                                   cfg.global_step,
                                   path,
                                   rand_name=False)

    # -- save histogram of gradients (denoiser) --
    if not model.use_unet_only:
        denoiser = model.denoiser_info.model
        plot_histogram_gradients(denoiser,
                                 "denoiser",
                                 cfg.global_step,
                                 path,
                                 rand_name=False)

    # -- save histogram of gradients (alignment) --
    if model.use_alignment:
        alignment = model.align_info.model
        plot_histogram_gradients(alignment,
                                 "alignment",
                                 cfg.global_step,
                                 path,
                                 rand_name=False)

    # -- save gradient norm by layer (denoiser) --
    if not model.use_unet_only:
        denoiser = model.denoiser_info.model
        plot_histogram_gradient_norms(denoiser,
                                      "denoiser",
                                      cfg.global_step,
                                      path,
                                      rand_name=False)

    # -- save gradient norm by layer (alignment) --
    if model.use_alignment:
        alignment = model.align_info.model
        plot_histogram_gradient_norms(alignment,
                                      "alignment",
                                      cfg.global_step,
                                      path,
                                      rand_name=False)

    if B > 4: B = 4
    for b in range(B):

        # -- save dirty & clean & res triplet --
        fn = path / Path(f"image_{cfg.global_step}_{b}.png")
        res = burst[b][N // 2] - denoised[b].mean(0)
        imgs = torch.stack([burst[b][N // 2], denoised[b].mean(0), res], dim=0)
        tv_utils.save_image(imgs,
                            fn,
                            nrow=3,
                            normalize=True,
                            range=(-0.5, 0.5))

        # -- save images --
        fn = path / Path(f"{cfg.global_step}_{b}.png")
        burst_b = torch.cat([
            burst[b][[N // 2]] - burst[b][[0]], burst[b],
            burst[b][[N // 2]] - burst[b][[-1]]
        ],
                            dim=0)
        aligned_b = torch.cat([
            aligned[b][[N // 2]] - aligned[b][[0]], aligned[b],
            aligned[b][[N // 2]] - aligned[b][[-1]]
        ],
                              dim=0)
        denoised_b = torch.cat([
            denoised[b][[N // 2]] - denoised[b][[0]], denoised[b],
            denoised[b][[N // 2]] - denoised[b][[-1]]
        ],
                               dim=0)
        imgs = torch.cat([burst_b, aligned_b, denoised_b], dim=0)  # 2N,C,H,W
        tv_utils.save_image(imgs,
                            fn,
                            nrow=N + 2,
                            normalize=True,
                            range=(-0.5, 0.5))

        # -- save filters --
        fn = path / Path(f"filters_{cfg.global_step}_{b}.png")
        K = int(np.sqrt(filters.shape[3]))
        L = filters.shape[1]
        if filters.shape[-1] > 1:
            S = npr.permutation(filters.shape[-1])[:10]
            filters_b = filters[b, ..., 0, S, S].view(N * 10 * L, 1, K, K)
        else:
            filters_b = filters[b, ..., 0, 0, 0].view(N * L, 1, K, K)
        tv_utils.save_image(filters_b, fn, nrow=N, normalize=True)

        # -- save direction image --
        fn = path / Path(f"arrows_{cfg.global_step}_{b}.png")
        if len(motion[b]) > 1 and len(motion[b].shape) > 1:
            arrows = create_arrow_image(motion[b], pad=2)
            tv_utils.save_image([arrows], fn)

    print(f"Wrote example images to file at [{path}]")
    plt.close("all")
예제 #41
0
def permute_for_monte_carlo(dist_matrix):
    """Returns permuted copy of distance matrix for Monte Carlo tests."""
    size = len(dist_matrix)
    p = permutation(size)
    return dist_matrix[p][:, p]
예제 #42
0
def main():
    # Dataset.
    ts_, ts_ext_, ts_vis_, ts, ts_ext, ts_vis, ys, ys_ = make_data()

    # Plotting parameters.
    vis_batch_size = 1024
    ylims = (-1.75, 1.75)
    alphas = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55]
    percentiles = [0.999, 0.99, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
    vis_idx = npr.permutation(vis_batch_size)
    # From https://colorbrewer2.org/.
    if args.color == "blue":
        sample_colors = ('#8c96c6', '#8c6bb1', '#810f7c')
        fill_color = '#9ebcda'
        mean_color = '#4d004b'
        num_samples = len(sample_colors)
    else:
        sample_colors = ('#fc4e2a', '#e31a1c', '#bd0026')
        fill_color = '#fd8d3c'
        mean_color = '#800026'
        num_samples = len(sample_colors)

    # Fix seed for the random draws used in the plots.
    eps = torch.randn(vis_batch_size, 1).to(device)
    bm = BrownianPath(t0=ts_vis[0],
                      w0=torch.zeros(vis_batch_size, 1).to(device))

    # Model.
    model = LatentSDE().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-2)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=.999)
    kl_scheduler = utils.LinearScheduler(iters=args.kl_anneal_iters)

    logp_metric = utils.EMAMetric()
    log_ratio_metric = utils.EMAMetric()
    loss_metric = utils.EMAMetric()

    if args.show_prior:
        with torch.no_grad():
            zs = model.sample_p(ts=ts_vis,
                                batch_size=vis_batch_size,
                                eps=eps,
                                bm=bm).squeeze()
            ts_vis_, zs_ = ts_vis.cpu().numpy(), zs.cpu().numpy()
            zs_ = np.sort(zs_, axis=1)

            img_dir = os.path.join(args.train_dir, 'prior.png')
            plt.subplot(frameon=False)
            for alpha, percentile in zip(alphas, percentiles):
                idx = int((1 - percentile) / 2. * vis_batch_size)
                zs_bot_ = zs_[:, idx]
                zs_top_ = zs_[:, -idx]
                plt.fill_between(ts_vis_,
                                 zs_bot_,
                                 zs_top_,
                                 alpha=alpha,
                                 color=fill_color)

            # `zorder` determines who's on top; the larger the more at the top.
            plt.scatter(ts_, ys_, marker='x', zorder=3, color='k',
                        s=35)  # Data.
            plt.ylim(ylims)
            plt.xlabel('$t$')
            plt.ylabel('$Y_t$')
            plt.tight_layout()
            plt.savefig(img_dir, dpi=args.dpi)
            plt.close()
            logging.info(f'Saved prior figure at: {img_dir}')

    for global_step in tqdm.tqdm(range(args.train_iters)):
        # Plot and save.
        if global_step % args.pause_iters == 0:
            img_path = os.path.join(args.train_dir,
                                    f'global_step_{global_step}.png')

            with torch.no_grad():
                zs = model.sample_q(ts=ts_vis,
                                    batch_size=vis_batch_size,
                                    eps=eps,
                                    bm=bm).squeeze()
                samples = zs[:, vis_idx]
                ts_vis_, zs_, samples_ = ts_vis.cpu().numpy(), zs.cpu().numpy(
                ), samples.cpu().numpy()
                zs_ = np.sort(zs_, axis=1)
                plt.subplot(frameon=False)

                if args.show_percentiles:
                    for alpha, percentile in zip(alphas, percentiles):
                        idx = int((1 - percentile) / 2. * vis_batch_size)
                        zs_bot_, zs_top_ = zs_[:, idx], zs_[:, -idx]
                        plt.fill_between(ts_vis_,
                                         zs_bot_,
                                         zs_top_,
                                         alpha=alpha,
                                         color=fill_color)

                if args.show_mean:
                    plt.plot(ts_vis_, zs_.mean(axis=1), color=mean_color)

                if args.show_samples:
                    for j in range(num_samples):
                        plt.plot(ts_vis_,
                                 samples_[:, j],
                                 color=sample_colors[j],
                                 linewidth=1.0)

                if args.show_arrows:
                    num, dt = 12, 0.12
                    t, y = torch.meshgrid([
                        torch.linspace(0.2, 1.8, num).to(device),
                        torch.linspace(-1.5, 1.5, num).to(device)
                    ])
                    t, y = t.reshape(-1, 1), y.reshape(-1, 1)
                    fty = model.f(t=t, y=y).reshape(num, num)
                    dt = torch.zeros(num, num).fill_(dt).to(device)
                    dy = fty * dt
                    dt_, dy_, t_, y_ = dt.cpu().numpy(), dy.cpu().numpy(
                    ), t.cpu().numpy(), y.cpu().numpy()
                    plt.quiver(t_,
                               y_,
                               dt_,
                               dy_,
                               alpha=0.3,
                               edgecolors='k',
                               width=0.0035,
                               scale=50)

                if args.hide_ticks:
                    plt.xticks([], [])
                    plt.yticks([], [])

                plt.scatter(ts_, ys_, marker='x', zorder=3, color='k',
                            s=35)  # Data.
                plt.ylim(ylims)
                plt.xlabel('$t$')
                plt.ylabel('$Y_t$')
                plt.tight_layout()
                plt.savefig(img_path, dpi=args.dpi)
                plt.close()
                logging.info(f'Saved figure at: {img_path}')

                if args.save_ckpt:
                    torch.save({'model': model.state_dict()},
                               os.path.join(ckpt_dir,
                                            f'global_step_{global_step}.ckpt'))

        # Train.
        optimizer.zero_grad()
        zs, log_ratio = model(ts=ts_ext, batch_size=args.batch_size)
        zs = zs.squeeze()
        zs = zs[
            1:
            -1]  # Drop first and last which are only used to penalize out-of-data region and spread uncertainty.

        likelihood = {
            "laplace": Laplace(loc=zs, scale=args.scale),
            "normal": Normal(loc=zs, scale=args.scale)
        }[args.likelihood]
        logp = likelihood.log_prob(ys).sum(dim=0).mean(dim=0)

        loss = -logp + log_ratio * kl_scheduler()
        loss.backward()
        optimizer.step()
        scheduler.step()
        kl_scheduler.step()

        logp_metric.step(logp)
        log_ratio_metric.step(log_ratio)
        loss_metric.step(loss)

        logging.info(
            f'global_step: {global_step}, '
            f'logp: {logp_metric.val():.3f}, log_ratio: {log_ratio_metric.val():.3f}, loss: {loss_metric.val():.3f}'
        )
예제 #43
0
def generate_sr_instance(players):
    players = set(players)
    preferences = {p :list(permutation(list(players - {p}))) for p in players}
    return preferences
lebron_normalized = nba_normalized[nba["player"] == "LeBron James"]

# Find the distance between Lebron James and everyone else.
euclidean_distances = nba_normalized.apply(lambda row: distance.euclidean(row, lebron_normalized), axis=1)
distance_frame = pandas.DataFrame(data={"dist": euclidean_distances, "idx": euclidean_distances.index})
distance_frame.sort_values("dist", inplace=True)
second_smallest = distance_frame.iloc[1]["idx"]
most_similar_to_lebron = nba.loc[int(second_smallest)]["player"]

## 6. Generating Training and Testing Sets ##

import random
from numpy.random import permutation

# Randomly shuffle the index of nba
random_indices = permutation(nba.index)
# Set a cutoff for how many items we want in the test set (in this case 1/3 of the items)
test_cutoff = math.floor(len(nba)/3)
# Generate the test set by taking the first 1/3 of the randomly shuffled indices
test = nba.loc[random_indices[1:test_cutoff]]
# Generate the train set with the rest of the data
train = nba.loc[random_indices[test_cutoff:]]

## 7. Using sklearn ##

# The columns that we'll be using to make predictions
x_columns = ['age', 'g', 'gs', 'mp', 'fg', 'fga', 'fg.', 'x3p', 'x3pa', 'x3p.', 'x2p', 'x2pa', 'x2p.', 'efg.', 'ft', 'fta', 'ft.', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf']
# The column we want to predict
y_column = ["pts"]

from sklearn.neighbors import KNeighborsRegressor
예제 #45
0
def preserve_strength(g,
                      randomize_topology=False,
                      preserve_mode='estimate_both',
                      permute_strength=True,
                      randomize_method='vl'):
    from numpy import array, ndarray
    from scipy.sparse import csc
    if (type(g) == ndarray or type(g) == csc.csc_matrix) and preserve_mode in [
            'estimate_both', 3
    ] and not randomize_topology:
        A = g
        from numpy import sum
        out_strength = sum(A, axis=1)
        in_strength = sum(A, axis=0)
        adj = 1 * (A > 0)
        out_degree = sum(adj, axis=1)
        in_degree = sum(adj, axis=0)

        if permute_strength:
            from numpy.random import permutation
            from numpy import unique
            out_in_sequence = array(list(zip(out_degree, in_degree)))
            for k in unique(out_in_sequence):
                k_ind = where((out_in_sequence == k).all(axis=1))[0]
                new_ind = permutation(k_ind)
                out_strength[k_ind] = out_strength[new_ind]
                in_strength[k_ind] = in_strength[new_ind]

        from numpy import mean, outer, logical_not
        mean_k = mean([out_degree, in_degree])
        mean_s = mean([out_strength, in_strength])
        G = (mean_k / mean_s) * outer(out_strength, in_strength) / outer(
            out_degree, in_degree)
        G[logical_not(adj)] = 0
        return G

    out_degree = g.degree(mode=1)
    in_degree = g.degree(mode=2)

    if randomize_topology:
        if g.is_directed():
            #Check if all edges are bidirectional.
            #If so, create a random graph with only bidirectional edges.
            G = g.copy()
            G.to_undirected(mode=False)
            if all(array(G.count_multiple()) == 2):
                G = g.Degree_Sequence(out_degree, method=randomize_method)
                G.to_directed()
            else:
                G = g.copy()
                G.rewire()
        else:
            G = g.Degree_Sequence(out_degree, method=randomize_method)
    else:
        G = g.copy()

    if preserve_mode in ['estimate_both', 3]:

        out_strength = array(g.strength(mode=1, weights='weight'))
        in_strength = array(g.strength(mode=2, weights='weight'))

        if permute_strength:
            from numpy.random import permutation
            from numpy import unique
            out_in_sequence = array(list(zip(out_degree, in_degree)))
            for k in unique(out_in_sequence):
                k_ind = where((out_in_sequence == k).all(axis=1))[0]
                new_ind = permutation(k_ind)
                out_strength[k_ind] = out_strength[new_ind]
                in_strength[k_ind] = in_strength[new_ind]

        from numpy import mean
        mean_k = mean([out_degree, in_degree])
        mean_s = mean([out_strength, in_strength])

        for e in G.es:
            e["weight"] = ((mean_k / mean_s) * out_strength[e.source] *
                           in_strength[e.target] /
                           (out_degree[e.source] * in_degree[e.target]))
        return G

    elif preserve_mode in ['out', 1]:
        preserve_mode = 1
    elif preserve_mode in ['in', 2]:
        preserve_mode = 2

    from numpy import sum
    ind = [g.incident(v, mode=preserve_mode) for v in range(g.vcount())]
    weights = g.es[sum(ind)]['weight']
    from numpy.random import shuffle
    map(shuffle, ind)
    G.es[sum(ind)]['weight'] = weights

    return G
예제 #46
0
def optimize(cfg, data):

    #################################
    '''      configurations       '''
    #################################

    print('\n\n*** Configuring')
    # choosing GPU device
    #os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'
    session_config = tf.ConfigProto()
    session_config.gpu_options.visible_device_list = '0'  # which GPUs to use

    print('\nConfigs:\n', cfg)

    # network
    k = cfg['k']
    alpha = cfg['alpha']

    # optimization
    learning_rate = cfg['learning_rate']
    beta = cfg['beta']
    batch_size = cfg['batch_size']
    break_thresh = cfg['break_thresh']
    training_epochs = cfg['training_epochs']

    # meta
    cfg_name = cfg['name']
    save_step = cfg['save_step']
    print_step = cfg['print_step']
    train_acc_checkpoints = cfg['train_acc_checkpoints']

    # paths
    data_path = '../data/'
    res_path = cfg['res_path']
    logs_path = res_path + 'logs/' + cfg_name + '/'
    data_name = res_path + cfg_name + '_data'
    res_name = res_path + cfg_name + '_res'
    ckpt_name = res_path + cfg_name + '_model'
    #model_name = 'C:/Users/Shira/Documents/TF/model_1_' + cfg_name + '.ckpt'

    np.save(data_name, data)

    print('\n*** Preparing data\n')
    x_train = data['x_train']
    y_train = data['y_train']
    x_test = data['x_test']
    y_test = data['y_test']
    n_train = x_train.shape[0]
    dim = x_train.shape[1]
    n_test = x_test.shape[0]

    n_all = n_train + n_test
    n_classes = 2
    if (n_train % batch_size != 0):
        print("\n*** Warning! batch size doesn't divide n_train *** \n")
        input("Press enter to continue")
    total_batch = int(n_train / batch_size)
    vi = 1 / np.sqrt(2 * k)
    w_max = 1 / np.sqrt(2 * k)

    print('\n*** Configured according to', cfg_name)
    # array for holding the accuracy results
    n_max_epochs = training_epochs + 1
    avg_costs = np.zeros(n_max_epochs)
    G_w = np.zeros(n_max_epochs)
    F_w = np.zeros(n_max_epochs)

    nzs_per_epoch = []
    iszero_list = []
    train_acc_list = []
    test_acc_list = []
    w_learned_list = []

    print('\n*** Building Computation Graph')

    # tf Graph Input
    x = tf.placeholder(tf.float32, [None, dim], name='InputData')
    y = tf.placeholder(tf.float32, [None], name='LabelData')

    weights = {}
    weights_out = tf.concat((vi * tf.ones(k), -vi * tf.ones(k)), axis=0)

    weights_init = rn.normal(0, 1, [dim, 2 * k])
    for i in range(2 * k):
        weights_init[:, i] /= np.sqrt(np.sum(weights_init[:, i]**2))
    weights_init *= w_max

    def degenerate_multilayer_perceptron(x, weights, alpha):
        layer = x
        n_curr = dim
        w_name = 'w0'
        weights[w_name] = tf.Variable(tf.cast(weights_init, tf.float32),
                                      name=w_name)
        layer = tf.matmul(layer, weights[w_name])
        layer = leaky_relu(layer, alpha)
        # Output layer
        out_layer = tf.tensordot(layer, weights_out, axes=1)
        return out_layer

    def leaky_relu(x, alpha):
        return tf.nn.relu(x) - alpha * tf.nn.relu(-x)

    # Encapsulating all ops into scopes, making Tensorboard's Graph
    # Visualization more convenient
    with tf.name_scope('Model'):
        # Build model
        pred = degenerate_multilayer_perceptron(x, weights, alpha)

    with tf.name_scope('Loss'):
        loss = tf.losses.hinge_loss(labels=y, logits=pred)

    with tf.name_scope('SGD'):
        # Gradient Descent
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        # Op to calculate every variable gradient
        grads = tf.gradients(loss, tf.trainable_variables())
        grads_and_vars = list(zip(grads, tf.trainable_variables()))
        # Op to update all variables according to their gradient
        apply_grads = optimizer.apply_gradients(grads_and_vars=grads_and_vars)

    with tf.name_scope('Accuracy'):
        # Accuracy
        acc = tf.equal(tf.sign(pred), tf.sign(y - .5))
        acc = tf.reduce_mean(tf.cast(acc, tf.float32))

    # Initialize the variables (i.e. assign their default value)
    init = tf.global_variables_initializer()

    #saver = tf.train.Saver([weights['w0']])

    # Start training
    with tf.Session(config=session_config) as sess:

        print('\n*** Training')
        # Run the initializer
        sess.run(init)

        epoch = 0

        # Training cycle
        for epoch in range(training_epochs):

            avg_cost = 0.
            epoch_perm = rn.permutation(n_train)

            nzs_per_epoch.append([0])

            first_step_in_epoch = True

            for ind in epoch_perm:

                #save_path = saver.save(sess, ckpt_name)
                if cfg['save_sbs'] or first_step_in_epoch:  #and epoch < lim_save_sbs
                    train_acc = acc.eval({x: x_train, y: y_train})
                    test_acc = acc.eval({x: x_test, y: y_test})
                    train_acc_list.append(train_acc)
                    test_acc_list.append(test_acc)
                    w_learned_list.append(np.array(weights['w0'].eval()).T)

                batch_xs, batch_ys = x_train[ind, :].reshape(
                    -1, dim), y_train[ind].reshape(1)

                # Run optimization op (backprop), cost op (to get loss value)
                _, loss_on_batch = sess.run([apply_grads, loss],
                                            feed_dict={
                                                x: batch_xs,
                                                y: batch_ys
                                            })

                # Compute average loss
                avg_cost += loss_on_batch / total_batch
                zs = np.int8(loss_on_batch == 0)
                nzs_per_epoch[-1] += 1 - zs

                first_step_in_epoch = False

            avg_costs[epoch] = avg_cost

            ind_cp = len(train_acc_checkpoints) - 1
            while ind_cp >= 0:
                if (train_acc > train_acc_checkpoints[ind_cp]):
                    print(
                        '\n\n!!! checkpoint: {:.3f}\n    epoch: {}\n    train acc: {:.3f}\n    test acc: {:.3f}\n'
                        .format(train_acc_checkpoints[ind_cp], epoch,
                                train_acc, test_acc))
                    train_acc_checkpoints = train_acc_checkpoints[ind_cp + 1:]
                    break
                else:
                    ind_cp -= 1

            stopping = (train_acc >= break_thresh)

            if (epoch / print_step == 100):
                print_step *= 10
                print(
                    '\nprint step grows by a factor of 10 and is now equal to',
                    print_step)

            if (epoch % print_step == 0) or (epoch < 10) or stopping:
                print('\n\nEpoch: {}'.format(epoch))
                print(
                    'Before training on epoch, train Accuracy: {:.3f}'.format(
                        train_acc))
                print('Test Accuracy: {:.3f}'.format(test_acc))
                print('While training, cost =', '{:.9f}'.format(avg_cost),
                      '({:.3f})'.format(np.exp(-avg_cost)))
                print('Number of non-zero steps (all): ', nzs_per_epoch[-1])
            else:
                print('{}, '.format(epoch), end='')

            if (epoch % save_step == 0) or stopping:
                print('\n*** Saving')
                ind_try = 0
                while True:
                    ind_try += 1
                    try:
                        np.savez(res_name,
                                 avg_costs=avg_costs[:epoch + 1],
                                 x_train=x_train,
                                 y_train=y_train,
                                 nzs_per_epoch=nzs_per_epoch,
                                 train_acc_list=train_acc_list,
                                 test_acc_list=test_acc_list,
                                 w_learned_list=w_learned_list,
                                 config=cfg)
                        break
                    except PermissionError:
                        print('\n#' * 20, end='')
                        print(
                            '\n<<< Saving attempt {} failed, trying again >>> \n'
                            .format(ind_try))
                    except KeyboardInterrupt:
                        print('\n<<< Simulation interrupted, cannot save')
                        stopping = True
                        break

            if stopping:
                print(
                    '\n*** Epoch: {}\n    Training reached {} threshold and is stopping'
                    .format(epoch, break_thresh))
                break

            # print('\n*** Saving model')
            # saver.save(sess, model_name)
            # #saver_b.save(sess, biases_name)

        print('\n*** Optimization Finished!')
        print('\n*** Configured according to', cfg_name)
        print('Configs:\n', cfg)
        # Calculate accuracy
        print('*** Train Accuracy: {:.3f}'.format(
            acc.eval({
                x: x_train,
                y: y_train
            })))
        print('*** Accuracy: {:.3f}'.format(acc.eval({x: x_test, y: y_test})))

        print('\n*** Run the command line:' \
              '\n      --> tensorboard --logdir=', logs_path, \
              '\n      Then open http://0.0.0.0:6006/ into your web browser\n')

        return
예제 #47
0
import numpy as np
from numpy import random
import numpy as np

if __name__ == '__main__':
    rand1 = random.choice([3, 5, 7, 11, 13],
                          p=[0.1, 0.2, 0.2, 0.45, 0.05],
                          size=(5, 5))
    print("\nRandom primes:\n", rand1)

    arr = np.arange(1, 10)
    random.shuffle(arr)
    print("\nRandom shuffle:\n", arr)
    print("\nRandom permutation:\n", random.permutation(np.arange(1, 10)))
예제 #48
0
 def swap_channels(self, img):
     # Apply channel swap
     if random.randint(2):
         img = img[..., random.permutation(3)]
     return img
from itertools import permutations
from numpy.random import permutation
import numpy

All = open('sample100.txt','r+')
line = All.readlines()

L = sum(1 for line in line)
l = L//2
M = chararray((l,2))
M = chararray(M.shape, itemsize=1000)
M[:] = 'NA'

i = 0
j = 0
while i < L:
    M[j,0] = line[i]
    M[j,1] = line[i+1]
    i += 2
    j += 1

srs = permutation(arange(l))
Rand = M[srs,:]
Rand = concatenate(Rand)

j = 0
file = open("random_sample1000.txt","w")
while j < len(Rand):
    file.write("%s" %Rand[j].decode('utf-8') + '\n')
    j += 1
file.close()
예제 #50
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--save', type=str, default='work/icnn')
    parser.add_argument('--nEpoch', type=int, default=100)
    parser.add_argument('--trainBatchSz', type=int, default=128)
    parser.add_argument('--layerSizes',
                        type=int,
                        nargs='+',
                        default=[600, 600])
    # parser.add_argument('--testBatchSz', type=int, default=2048)
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('--data', type=str)
    parser.add_argument('--valSplit', type=float, default=0)
    parser.add_argument('--noncvx', action='store_true')
    parser.add_argument('--inference_lr', type=float, default=0.01)
    parser.add_argument('--inference_momentum', type=float, default=0.5)
    parser.add_argument('--inference_nIter', type=int, default=10)

    args = parser.parse_args()

    npr.seed(args.seed)
    tf.set_random_seed(args.seed)

    save = os.path.expanduser(args.save)
    if not os.path.isdir(save):
        os.makedirs(save, exist_ok=True)

    if args.data:
        print("Loading data from: ", args.data)
        with open(args.data, 'rb') as f:
            data = pickle.load(f)
    else:
        data = bibsonomy.loadBibtex("data/bibtex")

    nTest = data['testX'].shape[0]
    nFeatures = data['trainX'].shape[1]
    nLabels = data['trainY'].shape[1]
    nXy = nFeatures + nLabels

    nTrain_orig = data['trainX'].shape[0]
    nVal = int(args.valSplit * nTrain_orig)
    nTrain = nTrain_orig - nVal
    if args.valSplit > 0:
        I = npr.permutation(nTrain_orig)
        trainI, valI = I[:nTrain], I[nVal:]
        trainX = data['trainX'][trainI, :]
        trainY = data['trainY'][trainI, :]
        valX = data['trainX'][valI, :]
        valY = data['trainY'][valI, :]
    else:
        trainX = data['trainX']
        trainY = data['trainY']

    print("\n\n" + "=" * 40)
    print("+ nTrain: {}, nTest: {}".format(nTrain, nTest))
    print("+ nFeatures: {}, nLabels: {}".format(nFeatures, nLabels))
    print("=" * 40 + "\n\n")

    config = tf.ConfigProto(log_device_placement=False)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = Model(nFeatures, nLabels, args, sess)
        if args.valSplit > 0:
            model.train(args, trainX, trainY, valX, valY)
        else:
            model.train(args, trainX, trainY, data['testX'], data['testY'])
예제 #51
0
def shuffle(a):
    p = rand.permutation(len(a))
    shuffled = a[p]
    #print(a, shuffled)
    return shuffled
예제 #52
0
# Helper function to plot images by index in the validation set:

# In[ ]:


def plots_idx(idx, titles=None):
    plots([image.load_img(path + 'valid/' + filenames[i]) for i in idx],
          titles=titles)


# In[ ]:

#1. A few correct labels at random
correct = np.where(preds == val_labels[:, 1])[0]
idx = permutation(correct)[:n_view]
plots_idx(idx, probs[idx])

# In[ ]:

#2. A few incorrect labels at random
incorrect = np.where(preds != val_labels[:, 1])[0]
idx = permutation(incorrect)[:n_view]
plots_idx(idx, probs[idx])

# In[ ]:

#3. The images we most confident were cats, and are actually cats
correct_cats = np.where((preds == 0) & (preds == val_labels[:, 1]))[0]
most_correct_cats = np.argsort(probs[correct_cats])[::-1][:n_view]
plots_idx(correct_cats[most_correct_cats],
예제 #53
0
 def _shuffle(self):
     self._perm = npr.permutation(np.arange(self._len))
     self._cur = 0
예제 #54
0
 def sample_df(df, pct=0.1, nr=100):
     ''' 采样 随机取a行'''
     a = max(int(pct * df.shape[0]), int(nr))
     return df.loc[permutation(df.index)[:a], :]
예제 #55
0
def entropy(points, logp, N_entropy=10000, N_norm=2500):
    r"""
    Return entropy estimate and uncertainty from a random sample.

    *points* is a set of draws from an underlying distribution, as returned
    by a Markov chain Monte Carlo process for example.

    *logp* is the log-likelihood for each draw.

    *N_norm* is the number of points $k$ to use to estimate the posterior
    density normalization factor $P(D) = \hat N$, converting
    from $\log( P(D|M) P(M) )$ to $\log( P(D|M)P(M)/P(D) )$. The relative
    uncertainty $\Delta\hat S/\hat S$ scales with $\sqrt{k}$, with the
    default *N_norm=2500* corresponding to 2% relative uncertainty.
    Computation cost is $O(nk)$ where $n$ is number of points in the draw.

    *N_entropy* is the number of points used to estimate the entropy
    $\hat S = - \int P(M|D) \log P(M|D)$ from the normalized log likelihood
    values.
    """

    # Use a random subset to estimate density
    if N_norm >= len(logp):
        norm_points = points
    else:
        idx = permutation(len(points))[:N_entropy]
        norm_points = points[idx]

    # Use a different subset to estimate the scale factor between density
    # and logp.
    if N_entropy >= len(logp):
        entropy_points, eval_logp = points, logp
    else:
        idx = permutation(len(points))[:N_entropy]
        entropy_points, eval_logp = points[idx], logp[idx]
    """
    # Try again, just using the points from the high probability regions
    # to determine the scale factor
    N_norm = min(len(logp), 5000)
    N_entropy = int(0.8*N_norm)
    idx = np.argsort(logp)
    norm_points = points[idx[-N_norm:]]
    entropy_points = points[idx[-N_entropy:]]
    eval_logp = logp[idx[-N_entropy:]]
    """

    # Normalize p to a peak probability of 1 so that exp() doesn't underflow.
    #
    # This should be okay since for the normalizing constant C:
    #
    #      u' = e^(ln u + ln C) = e^(ln u)e^(ln C) = C u
    #
    # Using eq. 11 of Kramer with u' substituted for u:
    #
    #      N_est = < u'/p > = < C u/p > = C < u/p >
    #
    #      S_est = - < ln q >
    #            = - < ln (u'/N_est) >
    #            = - < ln C + ln u - ln (C <u/p>) >
    #            = - < ln u + ln C - ln C - ln <u/p> >
    #            = - < ln u - ln <u/p> >
    #            = - < ln u > + ln <u/p>
    #
    # Uncertainty comes from eq. 13:
    #
    #      N_err^2 = 1/(k-1) sum( (u'/p - <u'/p>)^2 )
    #              = 1/(k-1) sum( (C u/p - <C u/p>)^2 )
    #              = C^2 std(u/p)^2
    #      S_err = std(u'/p) / <u'/p> = (C std(u/p))/(C <u/p>) = std(u/p)/<u/p>
    #
    # So even though the constant C shows up in N_est, N_err, it cancels
    # again when S_est, S_err is formed.
    log_scale = max(eval_logp)
    # print("max log sample: %g"%log_scale)
    eval_logp -= log_scale

    # Compute entropy and uncertainty in nats
    rho = density(norm_points, entropy_points)
    frac = exp(eval_logp) / rho
    n_est, n_err = mean(frac), std(frac)
    s_est = log(n_est) - mean(eval_logp)
    s_err = n_err / n_est
    #print(n_est, n_err, s_est/LN2, s_err/LN2)
    ##print(np.median(frac), log(np.median(frac))/LN2, log(n_est)/LN2)
    if False:
        import pylab
        idx = pylab.argsort(entropy_points[:, 0])
        pylab.figure()
        pylab.subplot(221)
        pylab.hist(points[:, 0], bins=50, normed=True, log=True)
        pylab.plot(entropy_points[idx, 0], rho[idx], label='density')
        pylab.plot(entropy_points[idx, 0],
                   exp(eval_logp + log_scale)[idx],
                   label='p')
        pylab.ylabel("p(x)")
        pylab.legend()
        pylab.subplot(222)
        pylab.hist(points[:, 0], bins=50, normed=True, log=False)
        pylab.plot(entropy_points[idx, 0], rho[idx], label='density')
        pylab.plot(entropy_points[idx, 0],
                   exp(eval_logp + log_scale)[idx],
                   label='p')
        pylab.ylabel("p(x)")
        pylab.legend()
        pylab.subplot(212)
        pylab.plot(entropy_points[idx, 0], frac[idx], '.')
        pylab.xlabel("P[0] value")
        pylab.ylabel("p(x)/kernel density")

    # return entropy and uncertainty in bits
    return s_est / LN2, s_err / LN2
예제 #56
0
    def problem3(self, s):
        """Test LinkedList.__len__() and LinkedList.__str__(). 10 Points."""

        # LinkedList.__len__() (4 points) --------------------------------

        # Empty list
        l1 = [int(i) for i in randint(1, 60, randint(5, 10))]
        l2 = s.LinkedList()
        points = self._eqTest(
            0, len(l2),
            "LinkedList.__len__() failed on list {}".format(l1[:0]))

        # Single item
        l2.append(l1[0])
        points += self._eqTest(
            1, len(l2),
            "LinkedList.__len__() failed on list {}".format(l1[:1]))

        # Two items
        l2.append(l1[1])
        points += self._eqTest(
            2, len(l2),
            "LinkedList.__len__() failed on list {}".format(l1[:2]))

        # Many items
        for i in l1[2:]:
            l2.append(i)
        points += self._eqTest(
            len(l1), len(l2),
            "LinkedList.__len__() failed on list {}".format(l1))

        # LinkedList.__str__() (6 points) --------------------------------

        # Empty list
        l1 = [int(i) for i in randint(1, 60, randint(5, 10))]
        l2 = s.LinkedList()
        points += self._strTest(l1[:0], l2, "LinkedList.__str__() failed")

        # Single item (int)
        l2.append(l1[0])
        points += self._strTest(l1[:1], l2, "LinkedList.__str__() failed")

        # Two items (int)
        l2.append(l1[1])
        points += self._strTest(l1[:2], l2, "LinkedList.__str__() failed")

        # Many items (int)
        for i in l1[2:]:
            l2.append(i)
        points += self._strTest(l1, l2, "LinkedList.__str__() failed")

        # Single item (str)
        l1 = [str(i) for i in permutation(["a", "b", "c", "d", "e", "f"])]
        l2 = s.LinkedList()
        l2.append(l1[0])
        points += self._strTest(l1[:1], l2, "LinkedList.__str__() failed")

        # Many items (str)
        for i in l1[1:]:
            l2.append(i)
        points += self._strTest(l1, l2, "LinkedList.__str__() failed")

        return points
예제 #57
0
def fit_js(data, log_p, max_epochs=20):
	"""
	Fit isotropic Gaussian by minimizing Jensen-Shannon divergence.
	"""

	# data dimensionality
	D = data.shape[0]

	# data and hidden states
	X = tt.dmatrix('X')
	Z = tt.dmatrix('Z')

	nr.seed(int(time() * 1000.) % 4294967295)
	idx = nr.permutation(data.shape[1])[:100]

	# initialize parameters
	b = th.shared(np.mean(data[:, idx], 1)[:, None], broadcastable=(False, True))
	a = th.shared(np.std(data[:, idx] - b.get_value()))

	# model density
	log_q = lambda X: -0.5 * tt.sum(tt.square((X - b) / a), 0) - D * tt.log(tt.abs_(a)) - D / 2. * np.log(np.pi)

	G = lambda Z: a * Z + b

	# Jensen-Shannon divergence
	JSD = tt.mean(tt.log(tt.nnet.sigmoid(log_p(X) - log_q(X)))) \
		+ tt.mean(tt.log(tt.nnet.sigmoid(log_q(G(Z)) - log_p(G(Z)))))
	JSD = (JSD + np.log(4.)) / 2.

	# function computing JSD and its gradient
	f_jsd = th.function([Z, X], [JSD, th.grad(JSD, a), th.grad(JSD, b)])

	# SGD hyperparameters
	B = 200
	mm = 0.8
	lr = .5

	da = 0.
	db = 0.

	try:
		# display initial JSD
		print('{0:>4} {1:.4f}'.format(0, float(f_jsd(nr.randn(*data.shape), data)[0])))

		for epoch in range(max_epochs):
			values = []

			# stochastic gradient descent
			for t in range(0, data.shape[1], B):
				Z = nr.randn(D, B)
				Y = data[:, t:t + B]

				v, ga, gb = f_jsd(Z, Y)
				da = mm * da - lr * ga
				db = mm * db - lr * gb

				values.append(v)

				a.set_value(a.get_value() + da)
				b.set_value(b.get_value() + db)

			# reduce learning rate
			lr /= 2.

			# display estimated JSD
			print('{0:>4} {1:.4f}'.format(epoch + 1, np.mean(values)))

	except KeyboardInterrupt:
		pass

	return a.get_value() * np.eye(D), b.get_value()
# y = class3 인 학습데이터 생성
# 데이터 수
dataNumber_y3 = Y_class3
# 데이터가 평균
mu_y3 = [10, 10, 10, 10, 10]
# 데이터 분산된 정도
variance_y3 = 4
# 난수 생성
data_y3 = multivariate_normal(mu_y3, np.eye(5) * variance_y3, dataNumber_y3)
df_y3 = pd.DataFrame(data_y3, columns=['x1', 'x2', 'x3', 'x4', 'x5'])
df_y3['y'] = 'class3'

# 생성한 데이터를 하나의 DataFrame 으로 합치기
df = pd.concat([df_y1, df_y2, df_y3], ignore_index=True)
# 순서에 상관없이 데이터 정렬
df_totalTrainData = df.reindex(permutation(df.index)).reset_index(drop=True)

# 학습 데이터 확인
print("===== Data =====>")
print(df_totalTrainData.head())
print(df_totalTrainData.tail())
# 학습데이터 shape 확인
print("df_totalTrainData Shape : {}\n".format(df_totalTrainData.shape))

# 학습데이터 전체 그래프 확인
sns.pairplot(df_totalTrainData, hue="y", height=2)
plt.show()

### (2) 범주형 데이터 y컬럼 데이터 맴핑 선언
# y 컬럼 문자열 데이터를 리스트 형태로 변환
y_mapping = {
    return wrapper


class Solution:
    def insertSort(self, ls: List[int], replace=False) -> List[int]:
        if replace == False:
            nums = ls.copy()
        else:
            nums = ls
        Len = len(nums)
        if Len <= 1:
            return nums
        for i in range(1, Len):
            key = nums[i]
            for j in range(i - 1, -1, -1):
                if nums[j] > key:
                    nums[j + 1] = nums[j]
                else:
                    break
            nums[j] = key

        return nums


ls = permutation([i for i in range(10)])
for i in range(1):
    ls_sorted = Solution().insertSort(ls)
print(f'original list:\t{ls}')
print(f'sorted list:\t{ls_sorted}')
예제 #60
0
	def sample_other(other, S, F, n_rep, n_fix):
		fixated = np.nonzero(other)[0]
		indexer = list(map(lambda x: random.permutation(x)[:n_fix], np.tile(range(len(fixated)), [n_rep, 1])))
		r = fixated[np.transpose(indexer)]
		S_rand = S[r] # Saliency map values at random locations (including fixated locations!? underestimated)
		return S_rand