Esempio n. 1
0
def batch_iter(data, batch_size, num_epochs, seed=None, fill=False):
    """
    Generates a batch iterator for a dataset.
    """
    random = np.random.RandomState(seed)
    data = np.array(data)
    data_length = len(data)
    num_batches_per_epoch = int(len(data) / batch_size)
    if len(data) % batch_size != 0:
        num_batches_per_epoch += 1
    for epoch in range(num_epochs):
        # Shuffle the data at each epoch
        shuffle_indices = random.permutation(np.arange(data_length))
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_length)
            selected_indices = shuffle_indices[start_index:end_index]
            # If we don't have enough data left for a whole batch, fill it randomly
            if fill is True and end_index >= data_length:
                num_missing = batch_size - len(selected_indices)
                selected_indices = np.concatenate([
                    selected_indices,
                    random.randint(0, data_length, num_missing)
                ])
            yield data[selected_indices]
Esempio n. 2
0
def random_selections(data,n=999999999):
    """Iterator through random permutations of the given data.
    Each "epoch" contains all the samples exactly once."""
    count = 0
    while 1:
        selection = random.permutation(len(data))
        for index in selection:
            if count>=n: return
            yield data[index]
            count += 1
Esempio n. 3
0
    def get_pls_scores_permutation(self,
                                   gArray,
                                   mArray,
                                   gSizes,
                                   mSizes,
                                   numPermutation=NUM_PERMUTATION):
        '''
        g and m from legacy code, no particular meaning
        
        ???
        Permutation will be done within each data slice,
        due to different data characteristics in time points or delta, or etc.
        
        '''
        SampleNumber = self.SampleNumber
        PLS = PLSRegression(n_components=3)
        scores = []
        for jj in range(numPermutation):
            if str(jj)[-1] == '0': print("            Permutation --- %d" % jj)

            for g in gSizes:
                matrix1 = []
                for ii in range(g):
                    matrix1.append(permutation(gArray, SampleNumber))
                matrix1 = np.array(matrix1).T
                for m in mSizes:
                    matrix2 = []
                    for ii in range(m):
                        matrix2.append(permutation(mArray, SampleNumber))
                    matrix2 = np.array(matrix2).T
                    #
                    if matrix1.shape[1] > matrix2.shape[1]:
                        PLS.fit(matrix1, matrix2)
                        PLSscore = PLS.score(matrix1, matrix2)
                    else:
                        PLS.fit(matrix2, matrix1)
                        PLSscore = PLS.score(matrix2, matrix1)

                    scores.append(PLSscore)

        return scores
Esempio n. 4
0
    def train(self, X, Y):
        """ X: matrix of dimensions n x indim 
            y: column vector of dimension n x 1 """

        # choose random center vectors from training set
        rnd_idx = random.permutation(X.shape[0])[:self.numCenters]
        self.centers = [X[i, :] for i in rnd_idx]
        # calculate activations of RBFs
        G = self._calcAct(X)

        # calculate output weights (pseudoinverse)
        self.W = dot(pinv(G), Y)
Esempio n. 5
0
def univariate_permutations(p_model,
                            p_features,
                            p_X_train,
                            p_Y_train,
                            p_hyperparams=None,
                            p_cross_validations=5,
                            p_validation_fraction=0.25,
                            p_seed=0,
                            p_verbose=False):
    # TODO Add in p_regression and p_metric peices
    # TODO since we're doing cross-validation here, make X_train a larger portion of th overall dataset (80-90%)

    scores = []
    rs = ShuffleSplit(n_splits=p_cross_validations, test_size=p_validation_fraction, random_state=p_seed)

    # crossvalidate the scores on a number of different random splits of the data
    for j, (train_index, test_index) in enumerate(rs.split(p_X_train)): # loop through the n_splits train/test splits
        if p_verbose:
            print('Iteration number {} for the train test split'.format(j+1))
        Xt_train, Xt_test = p_X_train.iloc[train_index, :], p_X_train.iloc[test_index, :] # set X for train and test
        Yt_train, Yt_test = p_Y_train.iloc[train_index], p_Y_train.iloc[test_index] # set Y for train and test
        p_model.fit(Xt_train, Yt_train, p_hyperparams) # fit the model on the training set
        score = p_model.score(Xt_test, Yt_test)
        for i, (feature_name, index) in enumerate(p_features): # shuffle the ith predictor variable (to break any relationship with the target)
            X_t = Xt_test.copy() # copy the test data, so as not to disturb
            random.seed(p_seed) # set seed for reproducibility
            X_t.iloc[:, index] = random.permutation(X_t.iloc[:, index]) # Permute the observations from the ith variable
            shuff_score = p_model.score(X_t, Yt_test)
            scores.append([feature_name, index, (shuff_score - score) / shuff_score])
            if p_verbose:
                if i == 0:
                    print('{:*^65}'.format('Looping Through Remaining Variables'))
                print('Testing {}. {}: {:.5f}'.format(i+1, feature_name, (shuff_score - score) / shuff_score))
    print('{:*^65}'.format("Features sorted by their score:"))

    df = pd.DataFrame(columns=['Variable', 'Var_Index', 'Score'], data=scores)
    df['Var_Index'] = df['Var_Index'].apply(lambda x: tuple(x)) # change list to tuple in order to get hashable type
    df_agg = df.groupby(['Variable', 'Var_Index'], as_index=False)['Score'].mean() # Average the scores
    df_agg['Var_Index'] = df_agg['Var_Index'].apply(lambda x: list(x)) # Probably don't need to convert back to list...
    df_sorted = df_agg.sort_values(by='Score', ascending=False).reset_index(drop=True)

    for i in range(len(df_sorted)):
        print('{}: {:.5f}'.format(df_sorted['Variable'][i], df_sorted['Score'][i]))
Esempio n. 6
0
 def __call__(self, sample):
     # parse and assert
     image = sample['image'].copy()
     image = image.astype(np.float32)
     # random brightness
     if random.randint(2):
         delta = random.uniform(-self.brightness_delta,
                                self.brightness_delta)
         image += delta
     # mode == 0 means do random contrast first and mode == 1 means do random contrast last
     mode = random.randint(2)
     if mode == 1:
         if random.randint(2):
             alpha = random.uniform(self.contrast_lower,
                                    self.contrast_upper)
             image *= alpha
     # convert color from BGR to HSV
     image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
     # random saturation
     if random.randint(2):
         image[..., 1] *= random.uniform(self.saturation_lower,
                                         self.saturation_upper)
     # random hue
     if random.randint(2):
         image[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
         image[..., 0][image[..., 0] > 360] -= 360
         image[..., 0][image[..., 0] < 0] += 360
     # convert color from HSV to BGR
     image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
     # random contrast
     if mode == 0:
         if random.randint(2):
             alpha = random.uniform(self.contrast_lower,
                                    self.contrast_upper)
             image *= alpha
     # randomly swap channels
     if random.randint(2):
         image = image[..., random.permutation(3)]
     # update and return sample
     sample['image'] = image
     return sample
def batch_iter(data, batch_size, num_epochs, seed=None, fill=False):
    """
    Generates a batch iterator for a dataset.
    """
    random = np.random.RandomState(seed)
    data = np.array(data)
    data_length = len(data)
    num_batches_per_epoch = int(len(data)/batch_size)
    if len(data) % batch_size != 0:
        num_batches_per_epoch += 1
    for epoch in range(num_epochs):
        # Shuffle the data at each epoch
        shuffle_indices = random.permutation(np.arange(data_length))
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_length)
            selected_indices = shuffle_indices[start_index:end_index]
            # If we don't have enough data left for a whole batch, fill it randomly
            if fill is True and end_index >= data_length:
                num_missing = batch_size - len(selected_indices)
                selected_indices = np.concatenate([selected_indices, random.randint(0, data_length, num_missing)])
            yield data[selected_indices]
Esempio n. 8
0
    def getSmallTrainingInput(self, inputSize):
        smallTrainingSet = []
        smallTrainingLabels = []
        
        imageCountDict =  {'butler':0, 'radcliffe':0, 'bartan':0, 'bracco':0, 'gilpin':0, 'harmon':0}
        indexToLastNameDict =  {0:'butler', 1:'radcliffe', 2:'bartan', 3:'bracco', 4:'gilpin', 5:'harmon'}

        trainingSize = len(self.trainingSet)
        randomIndexArray = random.permutation(trainingSize)
        maxSizePerPerson = inputSize/6
        for i in randomIndexArray:
            lastNameIndex = self.trainingLabels[i].tolist().index(1)
            lastName = indexToLastNameDict[lastNameIndex]
            if imageCountDict[lastName] < maxSizePerPerson:
                smallTrainingSet.append(self.trainingSet[i])
                smallTrainingLabels.append(self.trainingLabels[i])
                imageCountDict[lastName] += 1


        smallTrainingSet = np.asarray(smallTrainingSet)
        smallTrainingLabels = np.asarray(smallTrainingLabels)

        return smallTrainingSet, smallTrainingLabels
Esempio n. 9
0
    def getSmallTrainingInput(self, inputSize):
        smallTrainingSet = []
        smallTrainingLabels = []

        imageCountDict = {
            'butler': 0,
            'radcliffe': 0,
            'bartan': 0,
            'bracco': 0,
            'gilpin': 0,
            'harmon': 0
        }
        indexToLastNameDict = {
            0: 'butler',
            1: 'radcliffe',
            2: 'bartan',
            3: 'bracco',
            4: 'gilpin',
            5: 'harmon'
        }

        trainingSize = len(self.trainingSet)
        randomIndexArray = random.permutation(trainingSize)
        maxSizePerPerson = inputSize / 6
        for i in randomIndexArray:
            lastNameIndex = self.trainingLabels[i].tolist().index(1)
            lastName = indexToLastNameDict[lastNameIndex]
            if imageCountDict[lastName] < maxSizePerPerson:
                smallTrainingSet.append(self.trainingSet[i])
                smallTrainingLabels.append(self.trainingLabels[i])
                imageCountDict[lastName] += 1

        smallTrainingSet = np.asarray(smallTrainingSet)
        smallTrainingLabels = np.asarray(smallTrainingLabels)

        return smallTrainingSet, smallTrainingLabels
 def __init__(self, dataset):
     self.dataset = dataset
     random = np.random.RandomState(seed=12345)
     self.perm = random.permutation(len(dataset))[:500]
Esempio n. 11
0
 def random_shuffle(self, random_seed: Optional[int]) -> "pyarrow.Table":
     random = np.random.RandomState(random_seed)
     return self.take(random.permutation(self.num_rows()))
Esempio n. 12
0
def fit_CV(mu,
           cv,
           fit_method='Exp',
           svr_gamma=0.06,
           x0=[0.5, 0.5],
           verbose=False):
    '''Fits a noise model (CV vs mean)
    Parameters
    ----------
    mu: 1-D array
        mean of the genes (raw counts)
    cv: 1-D array
        coefficient of variation for each gene
    fit_method: string
        allowed: 'SVR', 'Exp', 'binSVR', 'binExp' 
        default: 'SVR'(requires scikit learn)
        SVR: uses Support vector regression to fit the noise model
        Exp: Parametric fit to cv = mu^(-a) + b
        bin: before fitting the distribution of mean is normalized to be
             uniform by downsampling and resampling.
    Returns
    -------
    score: 1-D array
        Score is the relative position with respect of the fitted curve
    mu_linspace: 1-D array
        x coordiantes to plot (min(log2(mu)) -> max(log2(mu)))
    cv_fit: 1-D array
        y=f(x) coordinates to plot 
    pars: tuple or None
    
    '''
    log2_m = log2(mu)
    log2_cv = log2(cv)

    if len(mu) > 1000 and 'bin' in fit_method:
        #histogram with 30 bins
        n, xi = histogram(log2_m, 30)
        med_n = percentile(n, 50)
        for i in range(0, len(n)):
            # index of genes within the ith bin
            ind = where((log2_m >= xi[i]) & (log2_m < xi[i + 1]))[0]
            if len(ind) > med_n:
                #Downsample if count is more than median
                ind = ind[random.permutation(len(ind))]
                ind = ind[:len(ind) - med_n]
                mask = ones(len(log2_m), dtype=bool)
                mask[ind] = False
                log2_m = log2_m[mask]
                log2_cv = log2_cv[mask]
            elif (around(med_n / len(ind)) > 1) and (len(ind) > 5):
                #Duplicate if count is less than median
                log2_m = r_[log2_m,
                            tile(log2_m[ind],
                                 around(med_n / len(ind)) - 1)]
                log2_cv = r_[log2_cv,
                             tile(log2_cv[ind],
                                  around(med_n / len(ind)) - 1)]
    else:
        if 'bin' in fit_method:
            print('More than 1000 input feature needed for bin correction.')
        pass

    if 'SVR' in fit_method:
        try:
            from sklearn.svm import SVR
            if svr_gamma == 'auto':
                svr_gamma = 1000. / len(mu)
            #Fit the Support Vector Regression
            clf = SVR(gamma=svr_gamma)
            clf.fit(log2_m[:, newaxis], log2_cv)
            fitted_fun = clf.predict
            score = log2(cv) - fitted_fun(log2(mu)[:, newaxis])
            params = None
            #The coordinates of the fitted curve
            mu_linspace = linspace(min(log2_m), max(log2_m))
            cv_fit = fitted_fun(mu_linspace[:, newaxis])
            return score, mu_linspace, cv_fit, params

        except ImportError:
            if verbose:
                print(
                    'SVR fit requires scikit-learn python library. Using exponential instead.'
                )
            if 'bin' in fit_method:
                return fit_CV(mu, cv, fit_method='binExp', x0=x0)
            else:
                return fit_CV(mu, cv, fit_method='Exp', x0=x0)
    elif 'Exp' in fit_method:
        from scipy.optimize import minimize
        #Define the objective function to fit (least squares)
        fun = lambda x, log2_m, log2_cv: sum(
            abs(log2((2.**log2_m)**(-x[0]) + x[1]) - log2_cv))
        #Fit using Nelder-Mead algorythm
        optimization = minimize(fun,
                                x0,
                                args=(log2_m, log2_cv),
                                method='Nelder-Mead')
        params = optimization.x
        #The fitted function
        fitted_fun = lambda log_mu: log2(
            (2.**log_mu)**(-params[0]) + params[1])
        # Score is the relative position with respect of the fitted curve
        score = log2(cv) - fitted_fun(log2(mu))
        #The coordinates of the fitted curve
        mu_linspace = linspace(min(log2_m), max(log2_m))
        cv_fit = fitted_fun(mu_linspace)
        return score, mu_linspace, cv_fit, params
Esempio n. 13
0
 def random_shuffle(self, random_seed: Optional[int]) -> List[T]:
     random = np.random.RandomState(random_seed)
     return self._table.take(random.permutation(self.num_rows()))
training_op = optimizer.minimize(loss, var_list = train_vars) # -> layers 1 and 2 are now frozen

# the more data available, the more layers can be unfrozen

# frozen layers won't change -> cache the output of the topmost frozen layer for each training instance
# -> spped boost since training goes through the whole dataset many times
# (-> go through the frozen layers 1x/training instead of 1x/epoch)
# e.g., run the whole training set through the lower layers:
hidden2_outputs = sess.run(hidden2, feed_dict={X:X_train})
# during training build batches of hidden2_outputs instead of batches of training instances:
import numpy as np
import random as rnd
n_epochs = 100
n_batches = 500
for epoch in range(n_epochs):
    shuffled_idx = rnd.permutation(len(hidden2_outputs))
    hidden2_batches = np.array_split(hidden2_outputs[shuffled_idx], n_batches) 
    y_batches = np.array_split(y_train[shuffled_idx], n_batches)
    for hidden2_batch, y_batch in zip(hidden2_batches, y_batches):
        sess.run(training_op, feed_dict={hidden2:hidden2_batch, y:y_batch})
        
# if no existing model available and not a lot of labeled training data
# -> unsupervised training: train each layer 1 by 1 (starting from low-level) using unsupervised feature detection algorithm
# -> pretraining on auxiliary task (for which you can easily have enough labeled training data)        

# 5 ways to speed up the training
# 1) good initialization of the weights
# 2) good activation function
# 3) Batch Normalization
# 4) reuse of pretrained network
# 5) good optimizer (-> use Adam !)
Esempio n. 15
0
def rchoose(k, n):
    assert k <= n
    return random.permutation(range(n))[:k]
Esempio n. 16
0
def rchoose(k,n):
    "Choose k distinct value from range(n)."
    assert k<=n
    return random.permutation(range(n))[:k]
Esempio n. 17
0
def rchoose(k,n):
    assert k<=n
    return random.permutation(range(n))[:k]
Esempio n. 18
0
'''----------------------------------------------------------------'''

from numpy import random
import numpy as np

arr = np.array([1, 2, 3, 4, 5])

random.shuffle(arr)
print(arr)
'''----------------------------------------------------------------'''

from numpy import random
import numpy as np

arr = np.array([1, 2, 3, 4, 5])
print(random.permutation(arr))
'''----------------------------------------------------------------'''

import os, math, random
from time import sleep

while True:
    random_num = random.randint(1, 10)
    if random_num is 10:
        print('is equal to 10. The loop breaks')
        break
    elif random_num is not 10:
        print('is not equal to 10. The loop repeats')
        sleep(1)
'''----------------------------------------------------------------'''