def dispersion_ranking_NN(self, dist, num_norm_avg=50): """Computes the spatial dispersion factors for each gene. Given a cell distance matrix, this function calculates the k-nearest neighbor adjacency matrix, and performs k-nearest-neighbor averaging of the expression data. From the averaged expression data, the Fano factor (variance/mean) for each gene is computed. These factors are square rooted and then min-max normalized to generate the gene weights, from which gene rankings are calculated. Loads the cell annoations specified by 'ann_name' during the creation of the SAM object. Parameters ---------- dist - ndarray, float Square cell-to-cell distance matrix. num_norm_avg - int, optional, default 50 The top 'num_norm_avg' dispersions are averaged to determine the normalization factor when calculating the weights. Returns: ------- indices - ndarray, int The indices corresponding to the gene weights sorted in decreasing order. weights - ndarray, float The vector of gene weights. nnm - ndarray, int The square k-nearest-neighbor directed adjacency matrix. D_avg - ndarray, float The k-nearest-neighbor-averaged expression data. """ nnm = ut.dist_to_nn(dist, self.k) D_avg = nnm.dot(self.D) / np.sum(nnm, axis=1).reshape( self.D.shape[0], 1) dispersions = D_avg.var(0) / D_avg.mean(0) ma = np.sort(dispersions)[-num_norm_avg:].mean() dispersions[dispersions >= ma] = ma weights = ut.normalizer(dispersions**0.5) indices = np.argsort(-weights) self.D_avg = D_avg return indices, weights, nnm, D_avg
temp_data = arr_test[i[0]:i[1]] #generate an array the size of the current sub array, to be added. noise_array = np.array([[temp_data[k, 0]] + [temp_data[k, 1]] + [ temp_data[k, j] + temp_data[k, j] * np.random.uniform(-noise, noise) for j in xrange(2, 6) ] for k in xrange(len(temp_data))]) enh_array = np.concatenate((enh_array, noise_array), axis=0) #Data normalization #print noise_array # a custom built function normalizes along the column. The columns are then put back together #TIME REMOVED #now we have in order #Illumination DCW nitrate Lutein n-Flowrate enh_array = np.column_stack((normalizer(standardizer(enh_array[1:, 0])), normalizer(standardizer(enh_array[1:, 2])), normalizer(standardizer(enh_array[1:, 3])), normalizer(standardizer(enh_array[1:, 4])), normalizer(standardizer(enh_array[1:, 5])))) #this is meant to r_lengths = [[i, i + 12] for i in xrange(0, 12 * 100 * 5, 12)] #print r_lengths[-1] #print len(enh_array) #dataset without time #print enh_array for j in r_lengths: temp_data = enh_array[j[0]:j[1]] for k in xrange(len(temp_data) - 1):
#generate an array the size of the current sub array, to be added. noise_array = np.array([[temp_data[k, 0]] + [temp_data[k, 1]] + [ temp_data[k, j] + temp_data[k, j] * np.random.uniform(-noise, noise) for j in xrange(2, 6) ] for k in xrange(len(temp_data))]) enh_array = np.concatenate((enh_array, noise_array), axis=0) #Data normalization #print noise_array # a custom built function normalizes along the column. The columns are then put back together #TIME REMOVED #now we have in order #Illumination DCW nitrate Lutein n-Flowrate enh_array = np.column_stack( (normalizer(enh_array[1:, 0]), normalizer(enh_array[1:, 2]), normalizer(enh_array[1:, 3]), normalizer(enh_array[1:, 4]), normalizer(enh_array[1:, 5]))) #build and populate datasets ds2 = SupervisedDataSet(5, 3) #this is meant to r_lengths = [[i, i + 12] for i in xrange(0, 12 * 100 * 7, 12)] #print r_lengths[-1] #print len(enh_array) #dataset without time #print enh_array for j in r_lengths: temp_data = enh_array[j[0]:j[1]] for k in xrange(len(temp_data) - 1):
usecols=range(5), columns=lista_col) exp_1.col = lista_col1 exp_2.col = lista_col1 flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3]) flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4]) arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1) arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1) arr_test = np.concatenate( (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0) arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0) arr_temp = np.concatenate((arr_test, arr_val), axis=0) #columns are 'Illumination','Time', 'DCW', 'nitrate', 'lutein', 'nitrogen flowrate' arr_tempA = np.column_stack((normalizer(standardizer(arr_temp[:, 0])), normalizer(standardizer(arr_temp[:, 1])), normalizer(standardizer(arr_temp[:, 2])), normalizer(standardizer(arr_temp[:, 3])), normalizer(standardizer(arr_temp[:, 4])), normalizer(standardizer(arr_temp[:, 5])))) arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):] #save data so that it can be reversed later arr_temp_means = np.mean(arr_temp, axis=0, dtype=np.float64) #print exp_1.col #print arr_temp print arr_temp_means arr_temp_stdD = np.std(arr_temp, axis=0, dtype=np.float64) print arr_temp_stdD
#different deltaT.Normalized. Should be 12,24,36,72 #print cross_val #Open networks network_2 = NetworkReader.readFrom(net_fold + 'network_Type2H1NewSTD.xml') network_4 = NetworkReader.readFrom(net_fold + 'network_Type2H2NewSTD.xml') #normalize the cross validation set #normalize the other data with custom function. Remove cross validation and rearrange #print pd.DataFrame(array_data,columns=lista_col1) #------------------------------------------Test on cross validation set 3-------------------------------------------------------------------- arr_valT2 = np.column_stack( (standardizer(arr_val[:, 0]), normalizer(standardizer(arr_val[:, 1])), standardizer(arr_val[:, 2]), standardizer(arr_val[:, 3]), standardizer(arr_val[:, 4]), standardizer(arr_val[:, 5]))) beta = pd.DataFrame(arr_valT2, columns=lista_col2) beta.drop('Time', axis=1, inplace=True) #theta is the pre-treatment dataframe theta = pd.DataFrame(arr_val, columns=lista_col2) theta.drop('Time', axis=1, inplace=True) #for each network do the round #do the k #this takes out the cross validation set for ease of use. beta_Val = beta.values theta_Val = theta.values sim_net2 = [beta_Val[0]] #Predict from Experimental point
#Open networks network_2=NetworkReader.readFrom(net_fold+ 'network_Type2H1NewSTD_LessNoise2.xml') network_4=NetworkReader.readFrom(net_fold+ 'network_Type2H2NewSTD_LessNoise2.xml') #normalize the cross validation set #normalize the other data with custom function. Remove cross validation and rearrange #print pd.DataFrame(array_data,columns=lista_col1) #------------------------------------------Test on cross validation set 3-------------------------------------------------------------------- arr_valT2=np.column_stack((standardizer(arr_val[:,0]),normalizer(standardizer(arr_val[:,1])),standardizer(arr_val[:,2]),standardizer(arr_val[:,3]),standardizer(arr_val[:,4]),standardizer(arr_val[:,5]))) beta=pd.DataFrame(arr_valT2,columns=lista_col2) beta.drop('Time',axis=1, inplace=True) #theta is the pre-treatment dataframe theta=pd.DataFrame(arr_val,columns=lista_col2) theta.drop('Time',axis=1, inplace=True) #for each network do the round #do the k #this takes out the cross validation set for ease of use. beta_Val=beta.values theta_Val=theta.values sim_net2=[beta_Val[0]] #Predict from Experimental point #simulation for Network 2, 1 Hidden. for j in xrange(11):
sheetname='Sheet1', header=None, usecols=range(5), columns=lista_col) mb2.col = lista_col1 array_data = mb2.values[1:] #keep only the numbers and turn into numpy array length_data = len(array_data) #the four sets of experiments are given separately. These are their start and end points set_lengths = ((0, 13), (13, 26), (26, 39), (39, 52)) #third set is separated #an enchanced array that will contain the noisy data cross_val = array_data[set_lengths[2][0]:set_lengths[2][1]] times = normalizer(cross_val[:, 1]) #different deltaT.Normalized. Should be 12,24,36,72 #print cross_val deltaT = np.append(times[1:4], [times[6]]) #sys.exit() intrv = [1, 2, 3, 6] #Open networks network_1 = NetworkReader.readFrom(net_fold + 'network_Type1H1.xml') network_2 = NetworkReader.readFrom(net_fold + 'network_Type2H1.xml') network_3 = NetworkReader.readFrom(net_fold + 'network_Type1H2.xml') network_4 = NetworkReader.readFrom(net_fold + 'network_Type2H2.xml') #normalize the cross validation set
noise_array=np.array([[temp_data[k,0]]+[temp_data[k,j]+temp_data[k,j]*np.random.uniform(-noise,noise) for j in xrange(1,5)] for k in xrange(len(temp_data))]) #print noise_array #print enh_array enh_array=np.concatenate((enh_array,noise_array),axis=0) #print enh_array #Data normalization #print enh_array[:3] #print enh_array[1:3] #print 'its length',enh_array.shape # a custom built function normalizes along the column. The columns are then put back together enh_array=np.column_stack((normalizer(enh_array[1:,0]),normalizer(enh_array[1:,1]),normalizer(enh_array[1:,2]),normalizer(enh_array[1:,3]),normalizer(enh_array[1:,4]))) #print enh_array #print 'its length once normalized',enh_array.shape #test_arr=pd.DataFrame(enh_array) #test_arr.to_csv('test_check.csv') #sys.exit() #check later this is NOT DONE #build and populate datasets ds1 = SupervisedDataSet(5, 3) ds2 = SupervisedDataSet(4, 3) r_lengths=[[i,i+13] for i in xrange(0,13*100*3,13)] #print r_lengths for j in r_lengths:
columns=lista_col) exp_1.col = lista_col1 exp_2.col = lista_col1 flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3]) flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4]) arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1) arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1) arr_test = np.concatenate( (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0) arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0) arr_temp = np.concatenate((arr_test, arr_val), axis=0) arr_tempA = np.column_stack( (normalizer(arr_temp[:, 0]), normalizer(arr_temp[:, 1]), normalizer(arr_temp[:, 2]), normalizer(arr_temp[:, 3]), normalizer(arr_temp[:, 4]), normalizer(arr_temp[:, 5]))) arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):] #the four sets of experiments are given separately. These are their start and end points set_lengths = ((0, 13), (13, 26), (26, 39), (39, 52)) #third set is separated #an enchanced array that will contain the noisy data #different deltaT.Normalized. Should be 12,24,36,72 #print cross_val #Open networks
#generate an array the size of the current sub array, to be added. noise_array = np.array([[temp_data[k, 0]] + [temp_data[k, 1]] + [ temp_data[k, j] + temp_data[k, j] * np.random.uniform(-noise, noise) for j in xrange(2, 6) ] for k in xrange(len(temp_data))]) enh_array = np.concatenate((enh_array, noise_array), axis=0) #Data normalization #print noise_array # a custom built function normalizes along the column. The columns are then put back together #TIME REMOVED #now we have in order #Illumination DCW nitrate Lutein n-Flowrate enh_array = np.column_stack( (normalizer(enh_array[1:, 0]), normalizer(enh_array[1:, 2]), normalizer(enh_array[1:, 3]), normalizer(enh_array[1:, 4]), normalizer(enh_array[1:, 5]))) normalized_time = normalizer(exp_1.values[1:13, 1]) print normalized_time time_int = normalized_time[1:4] #build and populate datasets ds2 = SupervisedDataSet(6, 3) #this is meant to r_lengths = [[i, i + 12] for i in xrange(0, 12 * 100 * 5, 12)] #print r_lengths[-1] #print len(enh_array) #dataset without time #print enh_array
#---------- # build the datasets #---------- length_data=len(array_data) ds1 = SupervisedDataSet(5, 3) #the four sets are given separately set_lengths=((0,13),(13,26),(26,39),(39,52)) #Data normalization # a custom built function normalizes along the column. The columns are then put back together array_data=np.column_stack((normalizer(array_data[:,0]),normalizer(array_data[:,1]),normalizer(array_data[:,2]),normalizer(array_data[:,3]),normalizer(array_data[:,4]))) #print array_data for j in set_lengths: temp_data=array_data[j[0]:j[1]] for k in xrange(len(temp_data)-1): for l in xrange(k+1,len(temp_data)): ds1.addSample((temp_data[k][0],temp_data[l][1]-temp_data[k][1],temp_data[k][2],temp_data[k][3],temp_data[k][4]), (temp_data[l][2]-temp_data[k][2],temp_data[l][3]-temp_data[k][3],temp_data[l][4]-temp_data[k][4])) #in this data set we add the illumination factor, the chemical concentrations at a certain time #one of the inputs is the difference between current time and the time at step we want as output #the output is only the chemical concentrations
columns=lista_col) exp_1.col = lista_col1 exp_2.col = lista_col1 flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3]) flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4]) arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1) arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1) arr_test = np.concatenate( (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0) arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0) arr_temp = np.concatenate((arr_test, arr_val), axis=0) arr_temp = np.column_stack( (normalizer(arr_temp[:, 0]), normalizer(arr_temp[:, 1]), normalizer(arr_temp[:, 2]), normalizer(arr_temp[:, 3]), normalizer(arr_temp[:, 4]), normalizer(arr_temp[:, 5]))) arr_testT, arr_valT = arr_temp[:len(arr_test)], arr_temp[len(arr_test):] #the four sets of experiments are given separately. These are their start and end points set_lengths = ((0, 13), (13, 26), (26, 39), (39, 52)) #third set is separated #an enchanced array that will contain the noisy data #different deltaT.Normalized. Should be 12,24,36,72 #print cross_val #Open networks