Ejemplo n.º 1
0
def tqdm(iterable, *args, enabled=None, **kwargs):
    if enabled is None:
        enabled = set_tqdm_enabled.value
    from tqdm import tqdm as tqdm_
    if enabled:
        return tqdm_(iterable, *args, **kwargs)
    return iterable
Ejemplo n.º 2
0
def tqdm(*args, mininterval=5.0, **kwargs):
    return tqdm_(*args, mininterval=mininterval, **kwargs)
Ejemplo n.º 3
0
def get_contrasts(fmri_img: Union[str, PathLike,
                                  PosixPath, Nifti1Image],
                  events: Union[str, PathLike,
                                PosixPath, pd.DataFrame],
                  desc: str = 'effect_size',
                  design_kws: Union[dict, Bunch] = None,
                  glm_kws: Union[dict, Bunch] = None,
                  masker_kws: Union[dict, Bunch] = None,
                  standardize: bool = True,
                  scale: bool = False,
                  scale_between: tuple = (0, 1),
                  maximize: bool = False,
                  masker: [MultiNiftiMasker, NiftiLabelsMasker,
                           NiftiMapsMasker, NiftiMasker] = None,
                  feature_labels: Union[Sequence, pd.Index] = None,
                  session=None,
                  **kwargs
                  ) -> Bunch:
    """
    Return dict-like structure containing experimental contrasts.


    Using ``nilearn.glm.first_level.FirstLevel`` object,
    contrasts are first computed trial-wise. Then, the same is done
    for each experimental condition in ``trial_type_cols`` if a
    list of string is provided.

    Args:
        fmri_img: str, PathLike, PosixPath or Nifti1Image
            In-memory or path pointing to a ``nibabel.nifti1.Nifti1Image``.

        events: : str, PathLike, PosixPath or DataFrame
            In-memory or path pointing to a ``pandas.DataFrame``.

        desc: str (Default = 'effect_size')
            String passed to
            ``nilearn.glm.first_level.FirstLevel.compute_contrast``
            ``desc`` parameter.

        design_kws: dict or Bunch (Deault = None)
            Dict-like mapping of keyword arguments passed to
            ``nilearn.glm.first_level.make_first_level_design_matrix``.
            If a ``session`` object is passed in the parameters,
            the value under the corresponding key is used.

        glm_kws: dict or Bunch (Deault = None)
            Dict-like mapping of keyword arguments passed to
            ``nilearn.glm.first_level.FirstLevel.__init__``.
            If a ``session`` object is passed in the parameters,
            the value under the corresponding key is used.

        masker_kws: dict or Bunch (Deault = None)
            Dict-like mapping of keyword arguments passed to
            ``masker.__init__``.
            If a ``session`` object is passed in the parameters,
            the value under the corresponding key is used.

        standardize: bool (Default = True)
            If true (by default), the extracted brain signals are
            standardized using a ``sklearn.preprocessing.StandardScaler``
            object (demeaning ans scaling to variance). It is generally
            advised to standardize data for machine-learning operations.
            See notes for documentation, tutorials and more.

        scale: bool (Default = False)
            If true, the extracted brain signals are
            scaled (between 0 and 1 by default) using a
            ``sklearn.preprocessing.MinMaxScaler`` object. It is generally
            advised to standardize data for machine-learning operations.
            See notes for documentation, tutorials and more.

        scale_between: tuple (Default = (0, 1)
            Values between which the signal should be scaled.
            Default is (0, 1) - left = min, right = max.
            Only used if ``scale`` parameter is True.

        maximize: bool (Default = False)
            If true, scale each feature by its maximum absolute value.
            From the docs of ``sklearn.preprocessing.MaxAbsScaler``:
                '[...] Scales and translates each feature individually
                such that the maximal absolute value of each feature in
                training set is 1.0. Does not shift/center the data,
                and thus does not destroy any sparsity.'

        masker: MultiNiftiMasker, NiftiLabelsMasker,
                NiftiMapsMasker or NiftiMasker (Default = None)
            Masker object from the ``nilearn.input_data`` module meant
            to perform brain signal extraction (conversion from 4D or 3D
            image to 2D data).
            If omitted, a NiftiMasker with default parameters is used.

        feature_labels: List or pd.Index (Default = None)
            List of feature names used as columns for the brain signal matrix.
            Number of labels and number of features must match.
            An error is raised otherwise.

        session: dict or Bunch (Default = None)
            Dict-like structure containing all required and/or optional
            parameters. The functions ``fetch_fmriprep_session`` and
            ``get_fmri_session`` from ``cimaq_decoding_utils``
            return a ``session`` object. It is similar to the return
            values of ``nilearn.datasets.fetch{dataset_name}`` functions.

    Returns: ``sklearn.utils.Bunch``
        Dict-like structure with the following keys:
        ['model', 'contrast_img', 'signals',
         'feature_labels', 'condition_labels']

    Notes:
        https://scikit-learn.org/stable/modules/preprocessing.html#preprocessing
    """

    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler
    from sklearn.preprocessing import StandardScaler
    from cimaq_decoding_utils import get_frame_times, get_t_r

    # Parameter initialization
    design_defs, glm_defs = {}, {}
    fmri_img = nimage.image.load_img(fmri_img)
    events = [events if isinstance(events, pd.DataFrame)
              else pd.read_csv(events,
                               sep=['\t' if splitext(events)[1][1] == 't'
                                    else ','][0])][0]
    if session is not None:
        design_defs.update(session.design_defs)
        glm_defs.update(session.glm_defs)

    t_r, frame_times = get_t_r(fmri_img), get_frame_times(fmri_img)

    if design_kws is not None:
        design_defs.update(design_kws)
    if glm_kws is not None:
        glm_defs.update(glm_kws)

    # GLM initialization and contrast computation
    design = make_first_level_design_matrix(frame_times, events=events.iloc[1:, :],
                                            **design_defs)

    model = FirstLevelModel(**glm_defs).fit(run_imgs=fmri_img,
                                            design_matrices=design.iloc[:, 1:])
    contrasts = nimage.concat_imgs([model.compute_contrast(
                    trial, desc=desc) for trial in
                    tqdm_(design.columns[:-1].astype(str),
                          ncols=100,
                          desc='Computing Contrasts')])

    # Brain signals extraction
    pipe_components = ((standardize, 'standardize', StandardScaler()),
                       (maximize, 'maximize', MaxAbsScaler()),
                       (scale, 'scale', MinMaxScaler(scale_between)))

    pipe_components = [item[1:] for item in
                       list(filter(lambda x: x[0], pipe_components))]
    signals = masker.transform_single_imgs(contrasts)
    if pipe_components != []:
        pipeline = Pipeline(pipe_components)
        signals = pipeline.fit_transform(signals)
    signals = pd.DataFrame(signals,
                           index=design.iloc[:, :-1].columns)

    if feature_labels is not None:
        signals.set_axis(feature_labels, axis=1, inplace=True)

    return Bunch(model=model, contrast_img=contrasts,
                 signals=signals, feature_labels=feature_labels)
Ejemplo n.º 4
0
    def parse(self, process=None, batch=200, remove_after=False):
        """
        Write a serie of compressed dict files to disk. Structure :
        for each page : 
            - page_len          : the xml lenght of the page
            - content           : all the words, lower case, no ponct
            - ns                : namespace
            - redirect          : if the page redirect, where to
            - meta              : the metadata of pages (infobox...)
        """
        with open(self.work_directory+BLOCKLIST_PATH) as f:
            blocklist = json.load(f)
 
        global worker
        def worker(t):
            pages, blocklist = t
            parser = PageParser()
            pages = pages.split('<page>')
            dic = {}
            redirect = {}
            n_skip = 0
            for page in pages:
                try:
                    title = rx_title.search(page)[0]
                    if not self.__is_ok(title, blocklist):
                        n_skip += 1
                        continue
                except TypeError:
                    if len(page) != 2: # real parsing errors
                        print(page)
                    continue
                if '<redirect' not in page:

                    # xml parse
                    namespace = int(rx_nm.search(page)[0])
                    text = page.split('<text', 1)[1].split('>', 1)[1].split('</text>')[0]

                    # wikicode parse
                    data = parser.parse(text)
                    data['ns'] = namespace
                    dic[title] = data
                    dic[title]['page_len'] = len(page)
                else:
                    to_page = page.split('<redirect title="', 1)[1].split('" />', 1)[0]
                    redirect[title] = {'redirect': to_page}
                    redirect[title]['page_len'] = len(page)
            return dic, redirect, n_skip
        
        print('[DumpParser] Start parsing...')
        g = self.generate_chunks()
        with mp.Pool(process) as p:
            bar = tqdm_(total=self.total)
            buff = {}
            redirect = {}
            params = ((i, blocklist) for i in g)
            i = 0
            n_buf, n_red = 0, 0
            for rep, red, n_skip in p.imap_unordered(worker, params):
                bar.update(len(rep)+len(red)+n_skip)
                buff.update(rep)
                redirect.update(red)
                if i%(batch//2) == 0:
                    size = dict_size(buff)/1024**2
                    if size > batch:
                        filename = 'pages_{}'.format(n_buf)
                        self.__write_to_disk(buff, filename)
                        buff.clear()
                        n_buf += 1
                if i%(batch*2) == 0:
                    size = dict_size(redirect)/1024**2
                    if size > batch:
                        filename = 'redirect_{}'.format(n_red)
                        self.__write_to_disk(redirect, filename)
                        redirect.clear()
                        n_red += 1
                i += 1
            filename = 'pages_{}'.format(n_buf)
            self.__write_to_disk(buff, filename)
            filename = 'redirect_{}'.format(n_red)
            self.__write_to_disk(redirect, filename)

        if remove_after:
            os.remove(self.path_data)
            os.remove(self.path_index)
            print('[DumpParser] Files removed')
        self.thread.join()
        print('[DumpParser] Parsing to dictionnary finished!')