Beispiel #1
0
def task_run(task):
    Logger.debug("task begin")
    t_begin = time.time()
    ret = task.run()
    Logger.info('In parallel each task finishes in {0} seconds'.format(
        time.time() - t_begin) + " [@performance]")
    return ret
Beispiel #2
0
 def gaussianize(self):
     """
     Gaussianizes each feature.
     """
     Logger.debug("Gaussianizing features..")
     self.data = gaussianize_mat(self.data.T).T
     Logger.debug("Done gaussianizing..")
Beispiel #3
0
 def copy(self):
     """
     Returns a copy of this model.
     """
     Logger.debug("Copying model..")
     cp = SemanticModel(self.data.copy(), list(self.vocab))
     Logger.debug("Done copying model..")
     return cp
Beispiel #4
0
 def pca_reduce(self, ndims):
     """
     Reduces the dimensionality of the vector-space using PCA.
     """
     Logger.debug("Reducing with PCA to %d dimensions" % ndims)
     U, S, Vh = np.linalg.svd(self.data, full_matrices=False)
     self.data = np.dot(Vh[:ndims].T, np.diag(S[:ndims])).T
     Logger.debug("Done with PCA..")
Beispiel #5
0
 def save(self, filename):
     """
     Saves this semantic model at the given filename.
     """
     Logger.debug("Saving file: %s" % filename)
     shf = tables.open_file(filename, mode="w", title="SemanticModel")
     shf.createArray("/", "data", self.data)
     shf.createArray("/", "vocab", self.vocab)
     shf.close()
     Logger.debug("Done saving file..")
Beispiel #6
0
    def uniformize(self):
        """
        Uniformizes each feature.
        """
        Logger.debug("Uniformizing features..")
        R = np.zeros_like(self.data).astype(np.uint32)
        for ri in range(self.data.shape[0]):
            R[ri] = np.argsort(np.argsort(self.data[ri]))

        self.data = R.astype(np.float64)
        Logger.debug("Done uniformizing...")
Beispiel #7
0
def parallelize_tasks(tasks):
    cpu_count = min(len(tasks), os.cpu_count() - 1)
    with ThreadPoolExecutor(max_workers=cpu_count) as executor:
        begin = time.time()
        results = executor.map(task_run, tasks)
        results_list = list(results)
        log_info = 'In parallel all the tasks finished in {0} seconds [@performance]'.format(
            time.time() - begin)
        Logger.info(log_info)

        return results_list
Beispiel #8
0
    def zscore(self, axis=0):
        """
        Z-scores either each feature (if axis is 0) or each word (if axis is 1).
        If axis is None nothing will be Z-scored.
        """
        if axis is None:
            Logger.debug("Not Z-scoring..")
            return

        Logger.debug("Z-scoring on axis %d" % axis)
        if axis == 1:
            self.data = zscore(self.data.T).T
        elif axis == 0:
            self.data = zscore(self.data)
Beispiel #9
0
    def restrict_by_occurrence(self, min_rank=60, max_rank=60000):
        """
        Restricts the data to words that have an occurrence rank lower than
        [min_rank] and higher than [max_rank].
        """
        Logger.debug("Restricting words by occurrence..")
        nwords = self.data.shape[1]
        wordranks = np.argsort(np.argsort(self.data[0, :]))
        goodwords = np.nonzero(np.logical_and((nwords - wordranks) > min_rank,
                                              (nwords - wordranks) < max_rank))[0]

        self.data = self.data[:, goodwords]
        self.vocab = [self.vocab[i] for i in goodwords]
        Logger.debug("Done restricting words..")
Beispiel #10
0
    def clip(self, sds):
        """
        Clips feature values more than [sds] standard deviations away from the mean
        to that value.  Another method for dealing with outliers.
        """
        Logger.debug("Truncating features to %d SDs.." % sds)
        fsds = self.data.std(1)
        fms = self.data.mean(1)
        newdata = np.zeros(self.data.shape)
        for fi in range(self.data.shape[0]):
            newdata[fi] = np.clip(self.data[fi],
                                  fms[fi] - sds * fsds[fi],
                                  fms[fi] + sds * fsds[fi])

        self.data = newdata
        Logger.debug("Done truncating..")
Beispiel #11
0
    def __init__(self,
                 name,
                 model,
                 condition1,
                 condition2,
                 coordinates,
                 do_perm=False,
                 num_perm=1000,
                 figures=[]):

        # Given attributes

        # str
        self.name = name
        # SemanticModel
        self.model = model
        # Condition
        self.condition1 = condition1
        # Condition
        self.condition2 = condition2
        # list<Coordinate>
        self.coordinates = [Coordinate(**coord) for coord in coordinates]
        # list<str> paths of figures
        self.figures = figures
        # bool
        self.do_perm = do_perm
        # int
        self.num_perm = num_perm

        # Generated attributes

        # 1d vector
        self.vector = vectorize(self.condition1, self.condition2, model)
        # bool
        self.double_sided = False if 'baseline' in condition2.names else True

        # permuted vectors is matrix, every row stands for a difference between the
        # vectors of two conditions generated by randomly picked word list.
        if self.do_perm:
            if self.double_sided:
                self.permuted_vectors = self.double_side_permuted_vector()
            else:
                self.permuted_vectors = self.baseline_permuted_vector()
            log_info = 'generated {0} randomized vectors for contrast {1}'.format(
                self.num_perm, self.name)
            Logger.debug(log_info)
Beispiel #12
0
    def pca_reduce_multi(self, ndimlist):
        """
        Reduces the dimensionality of the vector-space using PCA for many
        different numbers of dimensions.  More efficient than running
        pca_reduce many times.

        Instead of modifying this object, this function returns a list of new
        SemanticModels with the specified numbers of dimensions.
        """
        Logger.debug("Reducing with PCA to fewer dimensions..")
        U, S, Vh = np.linalg.svd(self.data, full_matrices=False)
        newmodels = []
        for nd in ndimlist:
            newmodel = SemanticModel()
            newmodel.vocab = list(self.vocab)
            newmodel.data = np.dot(Vh[:nd].T, np.diag(S[:nd])).T
            newmodels.append(newmodel)
        return newmodels
Beispiel #13
0
    def __init__(self):

        with open(os.path.join(LOG_DIR, PID_FILE), 'w') as f:
            f.write(str(os.getpid()) + '\n')

        Logger.info("Loading subjects in advance ... [@performance]")
        begin_time = time.time()
        __ = Subjects.english1000
        Logger.info("Loading subjects finished, time cost: " +
                    str(time.time() - begin_time) + " s [@performance]")

        self.sqs_url = Config.sqs_url
        self.region = Config.region_name
        self.access_key = Config.aws_access_key_id
        self.secret_key = Config.aws_secret_access_key
        self.sqs = boto3.client('sqs',
                                region_name=self.region,
                                aws_access_key_id=self.access_key,
                                aws_secret_access_key=self.secret_key)
Beispiel #14
0
    def project_stims(self, stimwords):
        """
        Projects the stimuli given in [stimwords], which should be a list of lists
        of words, into this feature space. Returns the average feature vector across
        all the words in each stimulus.
        """
        Logger.debug("Projecting stimuli..")
        stimlen = len(stimwords)
        ndim = self.data.shape[0]
        pstim = np.zeros((stimlen, ndim))
        vset = set(self.vocab)
        for t in range(stimlen):
            dropped = 0
            for w in stimwords[t]:
                dropped = 0
                if w in vset:
                    pstim[t] += self[w]
                else:
                    dropped += 1

            pstim[t] /= (len(stimwords[t]) - dropped)

        return pstim
Beispiel #15
0
def FDR(vector, q, do_correction=False):
    original_shape = vector.shape
    vector = vector.flatten()
    N = vector.shape[0]
    sorted_vector = sorted(vector)
    if do_correction:
        C = np.sum([1.0 / i for i in range(N)])
    else:
        C = 1.0
    thresh = 0
    #a=b
    for i in range(N - 1, 0, -1):
        if sorted_vector[i] <= (i * 1.0) / N * q / C:
            thresh = sorted_vector[i]
            break
    thresh_vector = vector <= thresh
    thresh_vector = thresh_vector.reshape(original_shape)
    thresh_vector = thresh_vector * 1.0

    log_info = "FDR threshold is : {}, {} voxels rejected".format(
        thresh, thresh_vector.sum())
    Logger.debug(log_info)
    return thresh_vector, thresh
Beispiel #16
0
 def start(self):
     while True:
         try:
             message_id = self.probe()
             if message_id is not None:
                 Logger.info("Successfully process message" + message_id)
             else:
                 Logger.info("Didn't receive any message")
         except Exception as e:
             Logger.error(e)
             return
Beispiel #17
0
 def load(cls, filename):
     """
     Loads a semantic model from the given filename.
     """
     Logger.debug("Loading file: %s" % filename)
     shf = tables.open_file(filename)
     Logger.debug(shf)
     newsm = cls(None, None)
     newsm.data = shf.root.data.read()  # shf.getNode("/data").read()
     newsm.vocab = shf.root.vocab.read()  # shf.getNode("/vocab").read()
     shf.close()
     Logger.debug("Done loading file..")
     return newsm
Beispiel #18
0
    def __call__(self, inputs):

        # prepare analyses
        analyses = [WebGL()]
        group_analyses = [Mean(), WebGLGroup()]

        # render
        render = Render()

        # parse request
        Logger.info("Parsing request arguments ... [@performance]")
        begin_time = time.time()
        req = Request(**inputs)
        Logger.info("Parsing request arguments finished, time cost: " +
                    str(time.time() - begin_time) + "s [@performance]")

        # get the corresponding subjects
        subjects = getattr(Subjects, req.semantic_model)

        # do computation and analyses for each contrast
        output = []
        for contrast in req.contrasts:
            # create tasks
            tasks = [
                Task(req.name, sub, contrast, analyses) for sub in subjects
            ]

            # parallely compute individuals
            ret = parallelize_tasks(tasks)

            # run in sequence
            # import time
            # ret = []
            # t_begin = time.time()
            # for t in tasks:
            #     begin = time.time()
            #     ret.append(t.run())
            #     log_info = 'In sequence each task finishes in {0} seconds'.format(time.time() - begin)
            #     Logger.debug(log_info)
            # log_info = 'In sequence all the tasks finished in {0} seconds'.format(time.time() - t_begin)

            # collect results
            sub_res, data = zip(*ret)
            sub_res = {
                sub.name:
                {k: v
                 for i in res for k, v in i.serialize().items()}
                for res, sub in zip(sub_res, subjects)
            }

            # execute group evaluation
            next_data = {'contrast_results': data}
            grp_res = {}
            for ga in group_analyses:
                begin_time = time.time()
                res = ga(req.name, subjects, contrast, **next_data)
                Logger.info(
                    "Executing group analysis {0} costs {1} s [@performance]".
                    format(clsname(ga), str(time.time() - begin_time)))
                if isinstance(res, Serializable):
                    grp_res.update(res.serialize())
                elif isinstance(res, dict):
                    next_data.update(res)

            output.append(render.render(contrast, grp_res, sub_res))

        ret = json.dumps(output)
        Logger.debug(ret)
        update_contrast_result(ret)

        return ret
Beispiel #19
0
    def __mean__(self, subjects, do_perm, contrast_results):

        # Prepare volumes
        volumes = {}
        if self.mask_pred:
            for s, cr in zip(subjects, contrast_results):
                mask = s.voxels_predicted
                mask = cortex.Volume(mask, s.name, s.transform)
                s.data[mask.data == True] = -1
                if do_perm:
                    #FIXME which threshold?
                    cr.thresholded_contrast.data[mask.data == True] = -1
                    volumes[cr.subject] = s

        if do_perm:
            if not self.mask_pred:
                volumes = {
                    con_res.subject: con_res.threshold_05
                    for con_res in contrast_results
                }
            else:
                pass
        else:
            volumes = {
                con_res.subject: con_res
                for con_res in contrast_results
            }

        for v in volumes.values():
            v.data = np.nan_to_num(v.data)

        # Re-compute mask
        if self.recomputed_mask:
            # FIXME should it be s.predicted_mask_mni ?
            # s2 = [s.predicted_mask_MNI for s in subjects]
            s2 = [s.predicted_mask_mni for s in subjects]
            self.nan_mask = np.mean(np.stack(s2), axis=0)
            self.nan_mask = self.nan_mask >= 3 / 8.0
            np.save(MNI_MASK_FILE, self.nan_mask)

        mni_volumes = [
            cortex.mni.transform_to_mni(volumes[s.name],
                                        s.func_to_mni).get_data().T
            for s in subjects
        ]

        # Smooth
        if self.smooth is not None:
            Logger.debug("Smoothing with %f mm kernel.." % self.smooth)
            atlasim = nipy.load_image(FSL_DEFAULT_TEMPLATE)
            smoother = nipy.kernel_smooth.LinearFilter(atlasim.coordmap,
                                                       atlasim.shape,
                                                       self.smooth)

            new_mni_volumes = []
            for ml in mni_volumes:
                # Create nipy-style Image from volume
                ml_img = nipy.core.image.Image(ml.T, atlasim.coordmap)
                # Pass it through smoother
                sm_ml_img = smoother.smooth(ml_img)
                # Store the result
                new_mni_volumes.append(sm_ml_img.get_data().T)
            mni_volumes = new_mni_volumes

        # Mean values
        group_mean = np.mean(np.stack(mni_volumes), axis=0)
        group_mean[self.nan_mask == False] = np.nan
        un_nan = np.isnan(group_mean) * self.nan_mask
        group_mean[un_nan] = 0

        max_v_volume = 1 if do_perm or self.do_1pct else 2

        if self.do_1pct:
            th = np.percentile(group_mean[group_mean != 0], 90)
            group_mean = group_mean >= th

        mean_volume = cortex.Volume(group_mean,
                                    'MNI',
                                    'atlas',
                                    vmin=-max_v_volume,
                                    vmax=max_v_volume)

        sub_volumes = [
            cortex.Volume(vol,
                          'MNI',
                          'atlas',
                          vmin=-np.abs(vol).max(),
                          vmax=np.abs(vol).max()) for vol in mni_volumes
        ]

        for sub, vol in zip(subjects, sub_volumes):
            vol.data[sub.predicted_mask_mni == False] = np.nan

        return mean_volume, sub_volumes