Esempio n. 1
0
 #	The root of the problem may be that the dictionary is biased towards high expression
 #	(because the loss function in SMAF wasn't told to specifically care about lowly expressed genes)
 #	So we might change the loss function to be sensitive to the average loss for each gene
 #	...possibly normalized somehow so that we care the same about each gene individually
 #	This may also help with the poor performance for each gene across tissues
 #	In addition, we might weight Phi by a prior on (inverse) expression
 #	Note that applying this weight without making the dictionary sensitive to low expression did not improve the results
 k = min(int(xa.shape[1] * 1.5), 150)
 UW = (np.random.random((xa.shape[0], k)), np.random.random(
     (k, xa.shape[1])))
 ua, va = smaf(xa,
               k,
               5,
               0.0005,
               maxItr=10,
               use_chol=True,
               activity_lower=0.,
               module_lower=xa.shape[0] / 10,
               UW=UW,
               donorm=True,
               mode=1,
               mink=3.)
 x2a, phi, y, w, d, psi = recover_system_knownBasis(
     xa,
     measurements,
     sparsity,
     Psi=ua,
     snr=SNR,
     use_ridge=False,
     f="smaf_train_measurement")
 Results['SMAF (training)'] = compare_results(xa, x2a)
 np.save("SMAF_gene_actual_train_100", xa)
Esempio n. 2
0
 module_size_nmf = np.array([np.exp(entropy(abs(x))) for x in ua.T])
 usage_nmf = np.array([np.exp(entropy(abs(x))) for x in va.T])
 ua_nmf = ua
 w_nmf = va
 k = min(int(xa.shape[1] * 1.5), ds * 4)
 k = min(k, MAX_BASIS)
 UW = (np.random.random((xa.shape[0], k)), np.random.random(
     (k, xa.shape[1])))
 lda2 = ERROR_THRESH
 while True:
     U, W = smaf(xa,
                 k,
                 10,
                 lda2,
                 maxItr=10,
                 use_chol=True,
                 activity_lower=4.,
                 module_lower=400,
                 UW=UW,
                 donorm=True,
                 mode=1,
                 mink=5)
     nz = np.nonzero(U.sum(0))[0]
     U = U[:, nz]
     W = W[nz]
     xh_smaf = U.dot(W)
     fit_smaf = 1 - np.linalg.norm(xa - xh_smaf)**2 / np.linalg.norm(xa)**2
     if (len(nz) > MIN_BASIS) and (fit_smaf > MIN_FIT):
         break
     elif lda2 < ERROR_THRESH / 16:
         break
     else: