def processItem(self, item): hash = item.hash() from cache import cacheFolder, cached cacheFile = join(cacheFolder, hash) try: data = urlopen(item.url).read() open(cacheFile, "wb").write(data) cached(item) except URLError: item.failed = True item.save()
def processItem(self,item): hash = item.hash() from cache import cacheFolder, cached cacheFile = join(cacheFolder, hash) try: data = urlopen(item.url).read() open(cacheFile, "wb").write(data) cached(item) except URLError: item.failed = True item.save()
def __init__(self, xx): ''' Inspired by PCA in matplotlib.mlab Compute the principle components of a dataset and stores the mean, sigma, and SVD of sigma for the data. Use toPC and fromPC to project the data onto a reduced set of dimensions and back. This version takes the SVD of the covariance matrix. Inputs: *xx*: a numobservations x numdims array Attrs: *nn*, *mm*: the dimensions of xx *mu* : a numdims array of means of xx *sigma* : the covariance matrix *var* : the average amount of variance of each of the principal components *std* : sqrt of var *fracVar* : the fractional amount of variance from each principal component *fracStd* : sqrt of fracVar ''' self.nn, self.mm = xx.shape if self.nn < self.mm: raise RuntimeError('we assume data in a is organized with numrows>numcols') self.mu = xx.mean(axis=0) centeredXX = self.center(xx) #self.sigma = dot(centeredXX.T, centeredXX) / self.nn self.sigma = cached(dot, centeredXX.T, centeredXX) / self.nn # Columns of UU are the eigenvectors of self.sigma, i.e. the # principle components. UU and VV are transpose of each other; # we don't use VV. ss is the diagonal of the true S matrix. #self.UU, self.ss, self.VV = linalg.svd(self.sigma, full_matrices = False) self.UU, self.ss, self.VV = cached(linalg.svd, self.sigma, full_matrices = False) self.var = self.ss / float(self.nn) self.std = sqrt(self.var) self.fracVar = self.var / self.var.sum() self.fracStd = self.std / self.std.sum()
def diff(self): # Doing this weird to make caching work. def differ(a, b): return Diff(a, b) prev = (Revision.q.filter(Revision.svn_id < self.svn_id) .order_by(Revision.svn_id.desc()).first()) return cached(differ)(self, prev)
# In[2]: sns.set(context='talk') # In[3]: model_name = 'model5' by = 'SubSet' sample_n = 1000 # ## load files for all cell types # In[4]: df = cache.cached(data.prep_annotated_data) # In[5]: assert all(pd.notnull(df['log1p_tpm_rescaled'])) # In[6]: print(df.columns) #apply(lambda x: x.startswith('C')) # ## sample genes for analysis # In[7]:
except ImportError: from StringIO import StringIO import simplejson from lanshark.config import config import logging logger = logging.getLogger('lanshark') from lanshark import icons from lanshark import network from lanshark import sendfile from cache import cached socket.getaddrinfo = cached(config.CACHE_TIMEOUT, stats=config.debug)( socket.getaddrinfo) iconpath = os.path.join(config.DATA_PATH, "icons", "32x32") iconfactory = icons.URLIconFactory(iconpath, "/__data__/icons/128x128/", ".png") hidden_files = [re.compile(pattern) for pattern in config.HIDDEN_FILES] def hidden(filename): return any(pattern.match(filename) for pattern in hidden_files) class FileIndex(threading.Thread): """ The fileindex offers fast searching over a periodicaly updated file index """ def __init__(self, path):
# In[1]: import data import models import cache import seaborn as sns # In[2]: model_name = 'model5.2' by = 'cell_type' sample_n = 500 # In[3]: sample_df = cache.cached(models.prep_sample_df, sample_n=sample_n) (training_df, test_df) = models.split_sample_df(sample_df=sample_df, test_sample_n=1) # In[4]: model_file = models.get_model_file(model_name=model_name) #print(cache._read_file(model_file)) # In[5]: stan_data = models.prep_stan_data(sample_df=training_df, test_df=test_df, by=by) # In[ ]:
# In[3]: model_name = 'model5.3' by = 'SubSet' sample_n = 100 # ## get data, as we did in earlier examples # This will help in case we want to compare estimates for particular genes or samples # In[4]: sample_df = cache.cached(models.prep_sample_df, sample_n=sample_n) # In[5]: stan_data1 = models.prep_stan_data(sample_df, by=by, nu=1) stan_data2 = models.prep_stan_data(sample_df, by=by, nu=2) stan_data3 = models.prep_stan_data(sample_df, by=by, nu=3) stan_data4 = models.prep_stan_data(sample_df, by=by, nu=4) stan_data5 = models.prep_stan_data(sample_df, by=by, nu=5) stan_data6 = models.prep_stan_data(sample_df, by=by, nu=6) # In[6]: model = models.get_model_file(model_name=model_name)