def task_run(task): Logger.debug("task begin") t_begin = time.time() ret = task.run() Logger.info('In parallel each task finishes in {0} seconds'.format( time.time() - t_begin) + " [@performance]") return ret
def gaussianize(self): """ Gaussianizes each feature. """ Logger.debug("Gaussianizing features..") self.data = gaussianize_mat(self.data.T).T Logger.debug("Done gaussianizing..")
def copy(self): """ Returns a copy of this model. """ Logger.debug("Copying model..") cp = SemanticModel(self.data.copy(), list(self.vocab)) Logger.debug("Done copying model..") return cp
def pca_reduce(self, ndims): """ Reduces the dimensionality of the vector-space using PCA. """ Logger.debug("Reducing with PCA to %d dimensions" % ndims) U, S, Vh = np.linalg.svd(self.data, full_matrices=False) self.data = np.dot(Vh[:ndims].T, np.diag(S[:ndims])).T Logger.debug("Done with PCA..")
def save(self, filename): """ Saves this semantic model at the given filename. """ Logger.debug("Saving file: %s" % filename) shf = tables.open_file(filename, mode="w", title="SemanticModel") shf.createArray("/", "data", self.data) shf.createArray("/", "vocab", self.vocab) shf.close() Logger.debug("Done saving file..")
def uniformize(self): """ Uniformizes each feature. """ Logger.debug("Uniformizing features..") R = np.zeros_like(self.data).astype(np.uint32) for ri in range(self.data.shape[0]): R[ri] = np.argsort(np.argsort(self.data[ri])) self.data = R.astype(np.float64) Logger.debug("Done uniformizing...")
def parallelize_tasks(tasks): cpu_count = min(len(tasks), os.cpu_count() - 1) with ThreadPoolExecutor(max_workers=cpu_count) as executor: begin = time.time() results = executor.map(task_run, tasks) results_list = list(results) log_info = 'In parallel all the tasks finished in {0} seconds [@performance]'.format( time.time() - begin) Logger.info(log_info) return results_list
def zscore(self, axis=0): """ Z-scores either each feature (if axis is 0) or each word (if axis is 1). If axis is None nothing will be Z-scored. """ if axis is None: Logger.debug("Not Z-scoring..") return Logger.debug("Z-scoring on axis %d" % axis) if axis == 1: self.data = zscore(self.data.T).T elif axis == 0: self.data = zscore(self.data)
def restrict_by_occurrence(self, min_rank=60, max_rank=60000): """ Restricts the data to words that have an occurrence rank lower than [min_rank] and higher than [max_rank]. """ Logger.debug("Restricting words by occurrence..") nwords = self.data.shape[1] wordranks = np.argsort(np.argsort(self.data[0, :])) goodwords = np.nonzero(np.logical_and((nwords - wordranks) > min_rank, (nwords - wordranks) < max_rank))[0] self.data = self.data[:, goodwords] self.vocab = [self.vocab[i] for i in goodwords] Logger.debug("Done restricting words..")
def clip(self, sds): """ Clips feature values more than [sds] standard deviations away from the mean to that value. Another method for dealing with outliers. """ Logger.debug("Truncating features to %d SDs.." % sds) fsds = self.data.std(1) fms = self.data.mean(1) newdata = np.zeros(self.data.shape) for fi in range(self.data.shape[0]): newdata[fi] = np.clip(self.data[fi], fms[fi] - sds * fsds[fi], fms[fi] + sds * fsds[fi]) self.data = newdata Logger.debug("Done truncating..")
def __init__(self, name, model, condition1, condition2, coordinates, do_perm=False, num_perm=1000, figures=[]): # Given attributes # str self.name = name # SemanticModel self.model = model # Condition self.condition1 = condition1 # Condition self.condition2 = condition2 # list<Coordinate> self.coordinates = [Coordinate(**coord) for coord in coordinates] # list<str> paths of figures self.figures = figures # bool self.do_perm = do_perm # int self.num_perm = num_perm # Generated attributes # 1d vector self.vector = vectorize(self.condition1, self.condition2, model) # bool self.double_sided = False if 'baseline' in condition2.names else True # permuted vectors is matrix, every row stands for a difference between the # vectors of two conditions generated by randomly picked word list. if self.do_perm: if self.double_sided: self.permuted_vectors = self.double_side_permuted_vector() else: self.permuted_vectors = self.baseline_permuted_vector() log_info = 'generated {0} randomized vectors for contrast {1}'.format( self.num_perm, self.name) Logger.debug(log_info)
def pca_reduce_multi(self, ndimlist): """ Reduces the dimensionality of the vector-space using PCA for many different numbers of dimensions. More efficient than running pca_reduce many times. Instead of modifying this object, this function returns a list of new SemanticModels with the specified numbers of dimensions. """ Logger.debug("Reducing with PCA to fewer dimensions..") U, S, Vh = np.linalg.svd(self.data, full_matrices=False) newmodels = [] for nd in ndimlist: newmodel = SemanticModel() newmodel.vocab = list(self.vocab) newmodel.data = np.dot(Vh[:nd].T, np.diag(S[:nd])).T newmodels.append(newmodel) return newmodels
def __init__(self): with open(os.path.join(LOG_DIR, PID_FILE), 'w') as f: f.write(str(os.getpid()) + '\n') Logger.info("Loading subjects in advance ... [@performance]") begin_time = time.time() __ = Subjects.english1000 Logger.info("Loading subjects finished, time cost: " + str(time.time() - begin_time) + " s [@performance]") self.sqs_url = Config.sqs_url self.region = Config.region_name self.access_key = Config.aws_access_key_id self.secret_key = Config.aws_secret_access_key self.sqs = boto3.client('sqs', region_name=self.region, aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key)
def project_stims(self, stimwords): """ Projects the stimuli given in [stimwords], which should be a list of lists of words, into this feature space. Returns the average feature vector across all the words in each stimulus. """ Logger.debug("Projecting stimuli..") stimlen = len(stimwords) ndim = self.data.shape[0] pstim = np.zeros((stimlen, ndim)) vset = set(self.vocab) for t in range(stimlen): dropped = 0 for w in stimwords[t]: dropped = 0 if w in vset: pstim[t] += self[w] else: dropped += 1 pstim[t] /= (len(stimwords[t]) - dropped) return pstim
def FDR(vector, q, do_correction=False): original_shape = vector.shape vector = vector.flatten() N = vector.shape[0] sorted_vector = sorted(vector) if do_correction: C = np.sum([1.0 / i for i in range(N)]) else: C = 1.0 thresh = 0 #a=b for i in range(N - 1, 0, -1): if sorted_vector[i] <= (i * 1.0) / N * q / C: thresh = sorted_vector[i] break thresh_vector = vector <= thresh thresh_vector = thresh_vector.reshape(original_shape) thresh_vector = thresh_vector * 1.0 log_info = "FDR threshold is : {}, {} voxels rejected".format( thresh, thresh_vector.sum()) Logger.debug(log_info) return thresh_vector, thresh
def start(self): while True: try: message_id = self.probe() if message_id is not None: Logger.info("Successfully process message" + message_id) else: Logger.info("Didn't receive any message") except Exception as e: Logger.error(e) return
def load(cls, filename): """ Loads a semantic model from the given filename. """ Logger.debug("Loading file: %s" % filename) shf = tables.open_file(filename) Logger.debug(shf) newsm = cls(None, None) newsm.data = shf.root.data.read() # shf.getNode("/data").read() newsm.vocab = shf.root.vocab.read() # shf.getNode("/vocab").read() shf.close() Logger.debug("Done loading file..") return newsm
def __call__(self, inputs): # prepare analyses analyses = [WebGL()] group_analyses = [Mean(), WebGLGroup()] # render render = Render() # parse request Logger.info("Parsing request arguments ... [@performance]") begin_time = time.time() req = Request(**inputs) Logger.info("Parsing request arguments finished, time cost: " + str(time.time() - begin_time) + "s [@performance]") # get the corresponding subjects subjects = getattr(Subjects, req.semantic_model) # do computation and analyses for each contrast output = [] for contrast in req.contrasts: # create tasks tasks = [ Task(req.name, sub, contrast, analyses) for sub in subjects ] # parallely compute individuals ret = parallelize_tasks(tasks) # run in sequence # import time # ret = [] # t_begin = time.time() # for t in tasks: # begin = time.time() # ret.append(t.run()) # log_info = 'In sequence each task finishes in {0} seconds'.format(time.time() - begin) # Logger.debug(log_info) # log_info = 'In sequence all the tasks finished in {0} seconds'.format(time.time() - t_begin) # collect results sub_res, data = zip(*ret) sub_res = { sub.name: {k: v for i in res for k, v in i.serialize().items()} for res, sub in zip(sub_res, subjects) } # execute group evaluation next_data = {'contrast_results': data} grp_res = {} for ga in group_analyses: begin_time = time.time() res = ga(req.name, subjects, contrast, **next_data) Logger.info( "Executing group analysis {0} costs {1} s [@performance]". format(clsname(ga), str(time.time() - begin_time))) if isinstance(res, Serializable): grp_res.update(res.serialize()) elif isinstance(res, dict): next_data.update(res) output.append(render.render(contrast, grp_res, sub_res)) ret = json.dumps(output) Logger.debug(ret) update_contrast_result(ret) return ret
def __mean__(self, subjects, do_perm, contrast_results): # Prepare volumes volumes = {} if self.mask_pred: for s, cr in zip(subjects, contrast_results): mask = s.voxels_predicted mask = cortex.Volume(mask, s.name, s.transform) s.data[mask.data == True] = -1 if do_perm: #FIXME which threshold? cr.thresholded_contrast.data[mask.data == True] = -1 volumes[cr.subject] = s if do_perm: if not self.mask_pred: volumes = { con_res.subject: con_res.threshold_05 for con_res in contrast_results } else: pass else: volumes = { con_res.subject: con_res for con_res in contrast_results } for v in volumes.values(): v.data = np.nan_to_num(v.data) # Re-compute mask if self.recomputed_mask: # FIXME should it be s.predicted_mask_mni ? # s2 = [s.predicted_mask_MNI for s in subjects] s2 = [s.predicted_mask_mni for s in subjects] self.nan_mask = np.mean(np.stack(s2), axis=0) self.nan_mask = self.nan_mask >= 3 / 8.0 np.save(MNI_MASK_FILE, self.nan_mask) mni_volumes = [ cortex.mni.transform_to_mni(volumes[s.name], s.func_to_mni).get_data().T for s in subjects ] # Smooth if self.smooth is not None: Logger.debug("Smoothing with %f mm kernel.." % self.smooth) atlasim = nipy.load_image(FSL_DEFAULT_TEMPLATE) smoother = nipy.kernel_smooth.LinearFilter(atlasim.coordmap, atlasim.shape, self.smooth) new_mni_volumes = [] for ml in mni_volumes: # Create nipy-style Image from volume ml_img = nipy.core.image.Image(ml.T, atlasim.coordmap) # Pass it through smoother sm_ml_img = smoother.smooth(ml_img) # Store the result new_mni_volumes.append(sm_ml_img.get_data().T) mni_volumes = new_mni_volumes # Mean values group_mean = np.mean(np.stack(mni_volumes), axis=0) group_mean[self.nan_mask == False] = np.nan un_nan = np.isnan(group_mean) * self.nan_mask group_mean[un_nan] = 0 max_v_volume = 1 if do_perm or self.do_1pct else 2 if self.do_1pct: th = np.percentile(group_mean[group_mean != 0], 90) group_mean = group_mean >= th mean_volume = cortex.Volume(group_mean, 'MNI', 'atlas', vmin=-max_v_volume, vmax=max_v_volume) sub_volumes = [ cortex.Volume(vol, 'MNI', 'atlas', vmin=-np.abs(vol).max(), vmax=np.abs(vol).max()) for vol in mni_volumes ] for sub, vol in zip(subjects, sub_volumes): vol.data[sub.predicted_mask_mni == False] = np.nan return mean_volume, sub_volumes