Ejemplo n.º 1
0
    def _delete_orientation(self):
        """
        Delete orientation metadata. Garbage orientation metadata can lead to
        severe mis-registration trouble.

        """

        # prepare for smart caching
        if self.scratch is None:
            self.scratch = self.output_dir
        if self.caching:
            cache_dir = os.path.join(self.scratch, 'cache_dir')
            if not os.path.exists(cache_dir):
                os.makedirs(cache_dir)
            mem = Memory(cachedir=cache_dir, verbose=5)
        else:
            mem = Memory(None, verbose=0)

        # deleteorient for func
        for attr in ['n_sessions', 'session_output_dirs']:
            if getattr(self, attr) is None:
                warnings.warn("'%s' attribute of is None! Skipping" % attr)
                break
        else:
            self.func = [mem.cache(delete_orientation)(
                self.func[sess], self.session_output_dirs[sess])
                         for sess in range(self.n_sessions)]

        # deleteorient for anat
        if self.anat is not None:
            self.anat = mem.cache(delete_orientation)(
                self.anat, self.anat_output_dir)
Ejemplo n.º 2
0
def set_option(physical_cache_path: str = None) -> None:
    """
    Set global options to the package.

    :param physical_cache_path: Caching across Python interpreter sessions
        can save a lot of time. This option allows on-disk caching when the
        path is specified.

        Specifying an empty string ("") switches disk-caching off explicitly.
        Use "." to specify the current working directory instead.
        Kaggle kernels "seemed to like" on disk caching as long as I didn't
        try to commit the notebook. Then things ended up with a Code: 0 error
        failing the publishing attempt. It may be the most convenient there to
        comment out a set_option("") while experimenting, and to uncomment it
        just before committing the kernel.

        Leaving it at the default of None leaves the options unchanged.
        (There's likely more to come.)
    :return: None
    """
    if physical_cache_path is not None:
        global _mem

        if physical_cache_path != "":
            _mem = Memory(physical_cache_path, verbose=0)
            digit_correlations._cached_get_digit_correlation_data = \
                _mem.cache(digit_correlations._uncached_get_digit_correlation_data)
            digit_entropy_distribution.cached_generate_sample = \
                _mem.cache(digit_entropy_distribution._uncached_generate_sample)
        else:
            _mem = None
            digit_correlations.cached = \
                digit_correlations._lru_cached_get_digit_correlation_data
            digit_entropy_distribution.cached_generate_sample = \
                digit_entropy_distribution._lru_cached_generate_sample
Ejemplo n.º 3
0
def load_adni_longitudinal_rs_fmri(dirname='ADNI_longitudinal_rs_fmri',
                                   prefix='wr*.nii'):
    """ Returns paths of ADNI rs-fMRI
    """

    # get file paths and description
    images, subject_paths, description = _get_subjects_and_description(
        base_dir=dirname, prefix='I[0-9]*')
    images = np.array(images)
    # get func files
    func_files = list(map(lambda x: _glob_subject_img(
        x, suffix='func/' + prefix, first_img=True),
                     subject_paths))
    func_files = np.array(func_files)

    # get motion files
    # motions = None
    motions = list(map(lambda x: _glob_subject_img(
        x, suffix='func/' + 'rp_*.txt', first_img=True), subject_paths))

    # get phenotype from csv
    dx = pd.read_csv(os.path.join(_get_data_base_dir('ADNI_csv'),
                                  'DXSUM_PDXCONV_ADNIALL.csv'))
    roster = pd.read_csv(os.path.join(_get_data_base_dir('ADNI_csv'),
                                      'ROSTER.csv'))
    df = description[description['Image_ID'].isin(images)]
    df = df.sort_values(by='Image_ID')
    dx_group = np.array(df['DX_Group'])
    subjects = np.array(df['Subject_ID'])
    exams = np.array(df['EXAM_DATE'])
    exams = [date(int(e[:4]), int(e[5:7]), int(e[8:])) for e in exams]

    # caching dataframe extraction functions
    CACHE_DIR = _get_cache_base_dir()
    cache_dir = os.path.join(CACHE_DIR, 'joblib', 'load_data_cache')
    if not os.path.isdir(cache_dir):
        os.makedirs(cache_dir)
    memory = Memory(cachedir=cache_dir, verbose=0)

    def _get_ridsfmri(subjects):
        return [_ptid_to_rid(s, roster) for s in subjects]
    rids = np.array(memory.cache(_get_ridsfmri)(subjects))

    def _get_examdatesfmri(rids):
        return [_get_dx(rids[i], dx, exams[i], viscode=None, return_code=True)
                for i in range(len(rids))]

    exam_dates = np.array(memory.cache(_get_examdatesfmri)(rids))

    def _get_viscodesfmri(rids):
        return [_get_vcodes(rids[i], str(exam_dates[i]), dx)
                for i in range(len(rids))]
    viscodes = np.array(memory.cache(_get_viscodesfmri)(rids))
    vcodes, vcodes2 = viscodes[:, 0], viscodes[:, 1]

    return Bunch(func=func_files, dx_group=dx_group, exam_codes=vcodes,
                 exam_dates=exam_dates, exam_codes2=vcodes2,
                 motion=motions,
                 subjects=subjects, images=images)
Ejemplo n.º 4
0
def main():
    idir = 'data/tweet_sent'
    location = './cache'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    bert_name = 'bert-base-uncased'
    memory = Memory(location, verbose=0)

    train_dataloader, validation_dataloader = memory.cache(prepare_input_data)(idir, bert_name)
    model = memory.cache(train_model)(train_dataloader,  bert_name, device)
    model.eval()
    estimate_model(model, validation_dataloader)
Ejemplo n.º 5
0
    def _niigz2nii(self):
        """
        Convert .nii.gz to .nii (crucial for SPM).

        """

        cache_dir = os.path.join(self.output_dir, 'cache_dir')
        mem = Memory(cache_dir, verbose=100)

        self.func = mem.cache(do_niigz2nii)(self.func,
                                            output_dir=self.output_dir)
        if not self.anat is None:
            self.anat = mem.cache(do_niigz2nii)(self.anat,
                                                output_dir=self.output_dir)
Ejemplo n.º 6
0
def load_adni_longitudinal_hippocampus_volume():
    """ Returns longitudinal hippocampus measures
    """

    BASE_DIR = _get_data_base_dir('ADNI_csv')

    roster = pd.read_csv(os.path.join(BASE_DIR, 'ROSTER.csv'))
    dx = pd.read_csv(os.path.join(BASE_DIR, 'DXSUM_PDXCONV_ADNIALL.csv'))
    fs = pd.read_csv(os.path.join(BASE_DIR, 'UCSFFSX51_05_20_15.csv'))

    # extract hippocampus numerical values
    column_idx = np.arange(131, 147)
    cols = ['ST' + str(c) + 'HS' for c in column_idx]
    hipp = fs[cols].values
    idx_num = np.array([~np.isnan(h).all() for h in hipp])
    hipp = hipp[idx_num, :]

    # extract roster id
    rids = fs['RID'].values[idx_num]

    # caching dataframe extraction functions
    CACHE_DIR = _get_cache_base_dir()
    cache_dir = os.path.join(CACHE_DIR, 'joblib', 'load_data_cache')
    if not os.path.isdir(cache_dir):
        os.makedirs(cache_dir)
    memory = Memory(cachedir=cache_dir, verbose=0)

    # get subject id
    def _getptidshippo(rids):
        return [_rid_to_ptid(rid, roster) for rid in rids]
    ptids = memory.cache(_getptidshippo)(rids)

    # extract exam date
    exams = fs['EXAMDATE'].values[idx_num]
    vcodes = fs['VISCODE'].values[idx_num]
    vcodes2 = fs['VISCODE2'].values[idx_num]
    exams = list(map(
        lambda e: date(int(e[:4]), int(e[5:7]), int(e[8:])), exams))
    exams = np.array(exams)

    # extract diagnosis
    def _getdxhippo(rids, exams):
        return np.array(list(map(_get_dx, rids, [dx]*len(rids), exams)))
    dx_ind = memory.cache(_getdxhippo)(rids, exams)
    dx_group = DX_LIST[dx_ind]

    return Bunch(dx_group=np.array(dx_group), subjects=np.array(ptids),
                 hipp=np.array(hipp), exam_dates=np.array(exams),
                 exam_codes=np.array(vcodes), exam_codes2=np.array(vcodes2))
    def test_multioutput(self):
        cache = Memory(location=tempfile.gettempdir())
        cached_func = cache.cache(sklearn.datasets.make_regression)
        X, Y = cached_func(n_samples=250,
                           n_features=20,
                           n_informative=9,
                           n_targets=4,
                           bias=0.5,
                           effective_rank=10,
                           tail_strength=0.4,
                           noise=0.3,
                           shuffle=True,
                           coef=False,
                           random_state=1)
        X_train = X[:200, :]
        Y_train = Y[:200, :]
        X_test = X[200:, :]
        Y_test = Y[200:, :]

        data = {
            'X_train': X_train,
            'Y_train': Y_train,
            'X_test': X_test,
            'Y_test': Y_test
        }

        dataset_properties = {'multioutput': True}
        cs = SimpleRegressionPipeline(dataset_properties=dataset_properties).\
            get_hyperparameter_search_space()
        self._test_configurations(cs,
                                  data=data,
                                  dataset_properties=dataset_properties)
Ejemplo n.º 8
0
    def __init__(
        self,
        gmm_ubm,
        feature=None, cache=False
    ):

        super(SpeakerIdentification, self).__init__()

        self.gmm_ubm = gmm_ubm

        # default features for speaker identification are MFCC
        # 13 coefs + delta coefs  + delta delta coefs
        #          + delta energy + delta delta energy
        if feature is None:
            from pyannote.feature.yaafe import YaafeMFCC
            feature = YaafeMFCC(
                e=False, De=True, DDe=True,
                coefs=13, D=True, DD=True
            )
        self.feature = feature

        if cache:

            # initialize cache
            from joblib import Memory
            from tempfile import mkdtemp
            memory = Memory(cachedir=mkdtemp(), verbose=0)

            # cache feature extraction method
            self.get_features = memory.cache(self.get_features)
Ejemplo n.º 9
0
def pipeline(input_csv, output_csv, n_cores=1, cache="/tmp"):
    """
    Find first commit hash of appearing identifier in file.

    :param input_csv: Path to input csv.
    :param output_csv: Path to store result csv.
    :param n_cores: How many cores to use.
    :param cache: Cache location. If empty - no caching
    """
    if cache:
        memory = Memory(cache, verbose=0)
        parallel_comp = memory.cache(func=_parallel_comp)
    else:
        parallel_comp = _parallel_comp
    df = pd.read_csv(input_csv, header=None)
    df.columns = COLUMNS

    args = [
        tuple(getattr(line, col) for col in COLUMNS)
        for i, line in df.iterrows()
    ]
    res = Parallel(n_jobs=n_cores)(tqdm(
        (delayed(parallel_comp)(arg) for arg in args), total=len(args) - 1))

    new_args = [arg + (h, filename) for arg, (h, filename) in zip(args, res)]
    to_df = defaultdict(list)
    for arg in new_args:
        for col in NEW_COLUMNS:
            to_df[col].append(arg[NEW_COL2IND[col]])
    new_df = pd.DataFrame.from_dict(to_df)
    new_df = new_df[NEW_COLUMNS]
    new_df.to_csv(output_csv, index=False, header=False, compression="gzip")
Ejemplo n.º 10
0
def parse_all(basedir, mem: Memory = None, with_paths=False, from_cache=False):
    _parse = parse if mem is None else mem.cache(parse, ignore=['from_cache'])

    for f in Path(basedir).rglob("*.html"):
        text = _parse(f, from_cache)
        if text is not None:
            yield (f, text) if with_paths else text
Ejemplo n.º 11
0
    def test_multilabel(self):
        cache = Memory(location=tempfile.gettempdir())
        cached_func = cache.cache(
            sklearn.datasets.make_multilabel_classification
        )
        X, Y = cached_func(
            n_samples=150,
            n_features=20,
            n_classes=5,
            n_labels=2,
            length=50,
            allow_unlabeled=True,
            sparse=False,
            return_indicator=True,
            return_distributions=False,
            random_state=1
        )
        X_train = X[:100, :]
        Y_train = Y[:100, :]
        X_test = X[101:, :]
        Y_test = Y[101:, ]

        data = {'X_train': X_train, 'Y_train': Y_train,
                'X_test': X_test, 'Y_test': Y_test}

        dataset_properties = {'multilabel': True}
        cs = SimpleClassificationPipeline(dataset_properties=dataset_properties).\
            get_hyperparameter_search_space()
        self._test_configurations(configurations_space=cs, data=data)
Ejemplo n.º 12
0
def test_store_standard_types(capsys, tmpdir, compress, arg):
    """Test that standard types can be cached in s3fs store."""
    def func(arg):
        """Dummy function."""
        print("executing function")
        return arg

    register_s3fs_store_backend()

    mem = Memory(location=tmpdir.strpath,
                 backend='s3',
                 verbose=0,
                 compress=compress,
                 backend_options=dict(bucket="test"))

    assert mem.store_backend.location == os.path.join("s3:/", tmpdir.strpath,
                                                      "joblib")

    cached_func = mem.cache(func)
    result = cached_func(arg)

    assert result == arg

    out, err = capsys.readouterr()
    assert out == "executing function\n"
    assert not err

    # Second call should also return the cached result
    result2 = cached_func(arg)

    assert result2 == arg

    out, err = capsys.readouterr()
    assert not out
    assert not err
Ejemplo n.º 13
0
def test_clear_cache(capsys, tmpdir):
    """Check clearing the cache."""
    def func(arg):
        """Dummy function."""
        print("executing function")
        return arg

    register_s3fs_store_backend()

    mem = Memory(location=tmpdir.strpath,
                 backend='s3',
                 verbose=0,
                 backend_options=dict(bucket="test"))
    cached_func = mem.cache(func)
    cached_func("test")

    out, _ = capsys.readouterr()
    assert out == "executing function\n"

    mem.clear()

    cached_func("test")
    out, _ = capsys.readouterr()
    assert out == "executing function\n"

    mem.clear()
    print(mem.store_backend.location)
    assert not os.listdir(mem.store_backend.location)
Ejemplo n.º 14
0
def cacheSetup():
	global rollDiceCached
	global memory
	location = './.cache'
	memory = Memory(location, verbose=0)
	rng = np.random.RandomState(42)
	rollDiceCached = memory.cache(rollDice)
Ejemplo n.º 15
0
def extract_group_components(subject_components, variances,
                ccs_threshold=None, n_group_components=None, 
                cachedir=None):
    # Use asarray to cast to a non memmapped array
    subject_components = np.asarray(subject_components)
    if len(subject_components) == 1:
        # We are in a single subject case
        return subject_components[0, :n_group_components].T, \
                variances[0][:n_group_components]

    # The group components (concatenated subject components)
    group_components = subject_components.T
    group_components = np.reshape(group_components,
                                    (group_components.shape[0], -1))
    # Save memory
    del subject_components

    # Inter-subject CCA
    memory = Memory(cachedir=cachedir, mmap_mode='r')
    svd = memory.cache(linalg.svd)
    cca_maps, ccs, _ = svd(group_components, full_matrices=False)
    # Save memory
    del group_components
    if n_group_components is None:
        n_group_components = np.argmin(ccs > ccs_threshold)
    cca_maps = cca_maps[:, :n_group_components]
    ccs = ccs[:n_group_components]
    return cca_maps, ccs
Ejemplo n.º 16
0
def test_get_cache_items(tmpdir):
    """Test cache items listing."""
    def func(arg):
        """Dummy function."""
        return arg

    register_hdfs_store_backend()

    mem = Memory(location=tmpdir.strpath,
                 host=__namenode__,
                 backend='hdfs',
                 user='******',
                 verbose=100,
                 compress=False)
    assert not mem.store.get_cache_items()

    cached_func = mem.cache(func)
    for arg in ["test1", "test2", "test3"]:
        cached_func(arg)

    # get_cache_items always returns an empty list for the moment
    assert len(mem.store.get_cache_items()) == 3

    mem.clear()
    assert not mem.store.get_cache_items()
Ejemplo n.º 17
0
    def __init__(
        self,
        hmm=None,
        n_components=16, covariance_type='diag',
        min_duration=0.250,
        feature=None, cache=False
    ):

        super(SpeechActivityDetection, self).__init__()

        self.hmm = hmm
        self.hmm.min_duration = min_duration

        # default features for speech activity detection
        # are MFCC (12 coefficients + delta coefficient + delta energy)
        if feature is None:
            from pyannote.feature.yaafe import YaafeMFCC
            feature = YaafeMFCC(e=False, coefs=12, De=True, D=True)
        self.feature = feature

        if cache:

            # initialize cache
            from joblib import Memory
            from tempfile import mkdtemp
            memory = Memory(cachedir=mkdtemp(), verbose=0)

            # cache feature extraction method
            self.get_features = memory.cache(self.get_features)
Ejemplo n.º 18
0
def parallel_func(fcn, n_jobs=-1, verbose=None, total=None, mesg=None,
                  cache_dir=None, **kwargs):
    """Get an instance of parallel and delayed function.

    This function is inspired by MNE's one.

    Parameters
    ----------
    func : callable
        A function.
    n_jobs : int
        Number of jobs to run in parallel.
    total : int | None
        If int, use a progress bar to display the progress of dispatched
        jobs. This should only be used when directly iterating, not when
        using ``split_list`` or :func:`np.array_split`.
        If None (default), do not add a progress bar.
    mesg : string | None
        Message to display on the progress bar
    cache_dir : string | None
        If path to an existing directory, the function is going to cache the
        computations
    kwargs : dict | {}
        Additional arguments are sent to the joblibe.Parallel function.

    Returns
    -------
    parallel: instance of joblib.Parallel or list
        The parallel object.
    my_func: callable
        ``func`` if not parallel or delayed(func).
    """
    from frites.config import CONFIG
    # set_log_level(verbose)

    # manually merge inputs inside the default config
    for k, v in CONFIG["JOBLIB_CFG"].copy().items():
        kwargs[k] = v
    # verbosity level of joblib
    kwargs['verbose'] = 1 if verbose in ['debug', True] else 0

    # caching option
    if isinstance(cache_dir, str) and os.path.isdir(cache_dir):
        logger.info(f'Caching computations to {cache_dir}')
        memory = Memory(cache_dir, verbose=kwargs['verbose'])
        fcn = memory.cache(fcn)

    # parallel functions
    para_fcn = delayed(fcn)
    parallel = Parallel(n_jobs, **kwargs)

    if total is not None:
        def parallel_progress(op_iter):
            return parallel(ProgressBar(iterable=op_iter, max_value=total,
                                        mesg=mesg))
        parallel_out = parallel_progress
    else:
        parallel_out = parallel

    return parallel_out, para_fcn
Ejemplo n.º 19
0
def run_dmri_pipeline(subject_session, do_topup=True, do_edc=True):
    subject, session = subject_session
    data_dir = os.path.join(source_dir,  subject, session, 'dwi')
    write_dir = os.path.join(derivatives_dir, subject, session)
    dwi_dir = os.path.join(write_dir, 'dwi')
    # Apply topup to the images
    input_imgs = sorted(glob.glob('%s/sub*.nii.gz' % data_dir))
    dc_imgs = sorted(glob.glob(os.path.join(dwi_dir, 'dcsub*run*.nii.gz')))
    mem = Memory('/neurospin/tmp/bthirion/cache_dir')
    if len(dc_imgs) < len(input_imgs):
        se_maps = [
            os.path.join(source_dir, subject, session, 'fmap',
                         '%s_%s_dir-ap_epi.nii.gz' % (subject, session)),
            os.path.join(source_dir, subject, session, 'fmap',
                         '%s_%s_dir-pa_epi.nii.gz' % (subject, session))]

        if do_topup:
            fsl_topup(se_maps, input_imgs, mem, write_dir, 'dwi')

    # Then proceeed with Eddy current correction
    # get the images
    dc_imgs = sorted(glob.glob(os.path.join(dwi_dir, 'dc*run*.nii.gz')))
    dc_img = os.path.join(dwi_dir, 'dc%s_%s_dwi.nii.gz' % (subject, session))
    concat_images(dc_imgs, dc_img)

    # get the bvals/bvec
    file_bvals = sorted(glob.glob('%s/sub*.bval' % data_dir))
    bvals = np.concatenate([np.loadtxt(fbval) for fbval in sorted(file_bvals)])
    bvals_file = os.path.join(dwi_dir, 'dc%s_%s_dwi.bval' % (subject, session))
    np.savetxt(bvals_file, bvals)
    file_bvecs = sorted(glob.glob('%s/sub*.bvec' % data_dir))
    bvecs = np.hstack([np.loadtxt(fbvec) for fbvec in sorted(file_bvecs)])
    bvecs_file = os.path.join(dwi_dir, 'dc%s_%s_dwi.bvec' % (subject, session))
    np.savetxt(bvecs_file, bvecs)

    # Get eddy-preprocessed images
    # eddy_img = nib.load(glob.glob(os.path.join(dwi_dir, 'eddc*.nii*'))[-1])

    # Get eddy-preprocessed images
    eddy_img = mem.cache(eddy_current_correction)(
        dc_img, bvals_file, bvecs_file, dwi_dir, mem)

    # load the data
    gtab = gradient_table(bvals, bvecs, b0_threshold=10)
    # Create a brain mask

    from dipy.segment.mask import median_otsu
    b0_mask, mask = median_otsu(eddy_img.get_data(), 2, 1)
    if subject == 'sub-13':
        from nilearn.masking import compute_epi_mask
        from nilearn.image import index_img
        imgs_ = [index_img(eddy_img, i)
                 for i in range(len(bvals)) if bvals[i] < 50]
        mask_img = compute_epi_mask(imgs_, upper_cutoff=.8)
        mask_img.to_filename('/tmp/mask.nii.gz')
        mask = mask_img.get_data()
    # do the tractography
    streamlines = tractography(eddy_img, gtab, mask, dwi_dir)
    return streamlines
Ejemplo n.º 20
0
def main():
    np.random.seed(10)
    n = 100
    m = 100
    sampling = 0.1
    eps = 1
    x_sampler = Sampler(mean=np.array([[1.], [2], [3]]),
                        cov=np.array([[[.1]], [[.1]], [[.1]]]),
                        p=np.ones(3) / 3)
    y_sampler = Sampler(mean=np.array([[0.], [3], [5]]),
                        cov=np.array([[[.1]], [[.1]], [[.4]]]),
                        p=np.ones(3) / 3)

    x = x_sampler(n)
    y = y_sampler(m)

    mem = Memory(expanduser('~/cache'), verbose=0)

    fref, gref, refrecords = mem.cache(sinkhorn)(x,
                                                 y,
                                                 n_iter=1000,
                                                 sampling=1.,
                                                 eps=eps)

    for pin_potential in [False]:
        for avg_step_size in ['1/sqrt(t)']:
            f, g, records = mem.cache(sinkhorn)(x,
                                                y,
                                                n_iter=int(1e6),
                                                record_every=1000,
                                                step_size='1/sqrt(t)',
                                                avg_step_size=avg_step_size,
                                                sampling=sampling,
                                                pin_potential=pin_potential,
                                                eps=eps)
            norm = var_norm(refrecords['f'][-1][None, :] - records['f'],
                            axis=1)

            plt.plot(range(len(norm)),
                     norm,
                     label=f'pin_pot{pin_potential} avg{avg_step_size}')
    plt.xscale('log')
    # plt.yscale('log')
    plt.legend()

    plt.show()
Ejemplo n.º 21
0
def multiprocess_with_cache(inputs):
    """
    Compute multiprocess with one memory mapped cache.
    """
    location = "./cachedir"
    memory = Memory(location, verbose=0)
    f_memoized = memory.cache(f)
    return Parallel(n_jobs=-1)(delayed(f_memoized)(x) for x in inputs)
Ejemplo n.º 22
0
def run(aspect, word2vec, trained_model, gpu, out, test, batchsize,
        sparsity_coef, coherent_coef, dependent, order):
    """
    Train "Rationalizing Neural Predictions" for one specified aspect.

    Please refer README.md for details.
    """
    memory = Memory(cachedir='.', verbose=1)
    if os.path.splitext(test)[-1] == '.json':
        w2v, vocab, _, _, test_dataset = \
            memory.cache(prepare_data)(None, word2vec, aspect, annotation=test)
    elif os.path.splitext(test)[-1] == '.gz':
        w2v, vocab, test_dataset, _, _ = \
            memory.cache(prepare_data)(test, word2vec, aspect)
    else:
        raise ValueError(
            "Input 'test' must be either json file or gzipped text file with"
            " appropriate extension."
        )

    encoder = rationale.models.Encoder(
        w2v.shape[1], order, 200, 2, dropout=0.1
    )
    generator_cls = (rationale.models.GeneratorDependent
                     if dependent else rationale.models.Generator)
    # Original impl. uses two layers to model bi-directional LSTM
    generator = generator_cls(w2v.shape[1], order, 200, dropout=0.1)
    model = rationale.models.RationalizedRegressor(
        generator, encoder, w2v.shape[0], w2v.shape[1], initialEmb=w2v,
        dropout_emb=0.1,
        sparsity_coef=sparsity_coef, coherent_coef=coherent_coef
    )
    # Resume from a snapshot
    chainer.serializers.load_npz(trained_model, model)

    if gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    inv_vocab = {v: k for k, v in vocab.items()}
    results = rationale.prediction.test(model, test_dataset, inv_vocab,
                                        device=gpu, batchsize=batchsize)
    with open(os.path.join(out, 'output.json'), 'w') as fout:
        json.dump(results, fout)
Ejemplo n.º 23
0
def load_adni_longitudinal_csf_biomarker():
    """ Returns longitudinal csf measures
    """
    BASE_DIR = _get_data_base_dir('ADNI_csv')
    roster = pd.read_csv(os.path.join(BASE_DIR, 'ROSTER.csv'))
    dx = pd.read_csv(os.path.join(BASE_DIR, 'DXSUM_PDXCONV_ADNIALL.csv'))
    csf_files = ['UPENNBIOMK.csv', 'UPENNBIOMK2.csv', 'UPENNBIOMK3.csv',
                 'UPENNBIOMK4_09_06_12.csv', 'UPENNBIOMK5_10_31_13.csv',
                 'UPENNBIOMK6_07_02_13.csv', 'UPENNBIOMK7.csv',
                 'UPENNBIOMK8.csv']
    cols = ['RID', 'VISCODE', 'ABETA', 'PTAU', 'TAU']
    # 3,4,5,7,8
    csf = pd.DataFrame()
    for csf_file in csf_files[2:]:
        fs = pd.read_csv(os.path.join(BASE_DIR, csf_file))
        csf = csf.append(fs[cols])

    # remove nans from csf values
    biom = csf[cols[2:]].values
    idx = np.array([~np.isnan(v).any() for v in biom])
    biom = biom[idx]
    # get phenotype
    vcodes = csf['VISCODE'].values[idx]
    rids = csf['RID'].values[idx]

    # caching dataframe extraction functions
    CACHE_DIR = _get_cache_base_dir()
    cache_dir = os.path.join(CACHE_DIR, 'joblib', 'load_data_cache')
    if not os.path.isdir(cache_dir):
        os.makedirs(cache_dir)
    memory = Memory(cachedir=cache_dir, verbose=0)

    def _getptidscsf(rids):
        return list(map(lambda x: _rid_to_ptid(x, roster), rids))
    ptids = memory.cache(_getptidscsf)(rids)

    # get diagnosis
    def _getdxcsf(rids, vcodes):
        return list(map(lambda x, y: DX_LIST[_get_dx(x, dx, viscode=y)],
                   rids, vcodes))
    dx_group = memory.cache(_getdxcsf)(rids, vcodes)

    return Bunch(dx_group=np.array(dx_group), subjects=np.array(ptids),
                 csf=np.array(biom), exam_codes=np.array(vcodes),
                 exam_codes2=np.array(vcodes))
Ejemplo n.º 24
0
 def construct_and_attach_filename_data(self):
     synsets = self.synset_list
     num_per_synset = self.data['num_per_synset']
     seed = self.data['seed']
     folder = self.local_home('PrecomputedDicts')
     mem = Memory(folder)
     compute_filename_dict = mem.cache(self.compute_filename_dict)
     filenames, filenames_dict = compute_filename_dict(synsets, num_per_synset, seed)
     self.filenames_dict = filenames_dict
Ejemplo n.º 25
0
 def construct_and_attach_filename_data(self):
     synsets = self.synset_list
     num_per_synset = self.data['num_per_synset']
     seed = self.data['seed']
     folder = self.local_home('PrecomputedDicts')
     mem = Memory(folder)
     compute_filename_dict = mem.cache(self.compute_filename_dict)
     filenames, filenames_dict = compute_filename_dict(synsets, num_per_synset, seed)
     self.filenames_dict = filenames_dict
Ejemplo n.º 26
0
    def __init__(self, data_source='yahoo'):
        ''' Creates the object and sets up caching.

        :param data_source: A data souce such as `yahoo` or `google`.
        '''
        self.data_source = data_source
        # caching
        memory = Memory(cachedir='.')
        self.get = memory.cache(self.get)
Ejemplo n.º 27
0
 def get_multilabel(self):
     cache = Memory(location=tempfile.gettempdir())
     cached_func = cache.cache(make_multilabel_classification)
     return cached_func(n_samples=100,
                        n_features=10,
                        n_classes=5,
                        n_labels=5,
                        return_indicator=True,
                        random_state=1)
Ejemplo n.º 28
0
def add_caching_to_funcs(obj, funcNames):
	mem = Memory('../.add_caching_to_funcs', verbose=11)
	if obj is None or funcNames is None:
		return
	if isScalar(funcNames):
		funcNames = [funcNames]
	for name in funcNames:
		func = getattr(obj, name, None)
		if func is not None:
			setattr(obj, name, mem.cache(func))
Ejemplo n.º 29
0
    def _forward(self, im, indices):
        memory = Memory(location=self.memory, verbose=0)
        _apply_transform_cached = memory.cache(_apply_xform)

        logger.info('Applying forward transformations in pipeline')
        for xform in self.xforms:
            im = _apply_transform_cached(xform, im, indices, False)
        logger.info('All forward transformations applied')

        return im
Ejemplo n.º 30
0
def add_caching_to_funcs(obj, funcNames):
    mem = Memory('../.add_caching_to_funcs', verbose=11)
    if obj is None or funcNames is None:
        return
    if isScalar(funcNames):
        funcNames = [funcNames]
    for name in funcNames:
        func = getattr(obj, name, None)
        if func is not None:
            setattr(obj, name, mem.cache(func))
Ejemplo n.º 31
0
    def __init__(self):

        self.name = self.__class__.__name__

        try:
            from joblib import Memory
            mem = Memory(cachedir=self.home('cache'), verbose=False)
            self._get_meta = mem.cache(self._get_meta)
        except ImportError:
            pass
Ejemplo n.º 32
0
def load_adni_longitudinal_mmse_score():
    """ Returns longitudinal mmse scores
    """
    BASE_DIR = _get_data_base_dir('ADNI_csv')
    roster = pd.read_csv(os.path.join(BASE_DIR, 'ROSTER.csv'))
    dx = pd.read_csv(os.path.join(BASE_DIR, 'DXSUM_PDXCONV_ADNIALL.csv'))
    fs = pd.read_csv(os.path.join(BASE_DIR, 'MMSE.csv'))

    # extract nans free mmse
    mmse = fs['MMSCORE'].values
    idx_num = fs['MMSCORE'].notnull().values
    mmse = mmse[idx_num]

    # extract roster id
    rids = fs['RID'].values[idx_num]

    # caching dataframe extraction functions
    CACHE_DIR = _get_cache_base_dir()
    cache_dir = os.path.join(CACHE_DIR, 'joblib', 'load_data_cache')
    if not os.path.isdir(cache_dir):
        os.makedirs(cache_dir)
    memory = Memory(cachedir=cache_dir, verbose=0)

    def _getptidsmmse(rids):
        return [_rid_to_ptid(rid, roster) for rid in rids]

    # get subject id
    ptids = memory.cache(_getptidsmmse)(rids)
    # extract visit code (don't use EXAMDATE ; null for GO/2)
    vcodes = fs['VISCODE'].values
    vcodes = vcodes[idx_num]
    vcodes2 = fs['VISCODE2'].values
    vcodes2 = vcodes2[idx_num]

    def _getdxmmse(rids, vcodes2):
        return list(map(
            lambda x, y: DX_LIST[_get_dx(x, dx, viscode=y)], rids, vcodes2))

    # get diagnosis
    dx_group = memory.cache(_getdxmmse)(rids, vcodes2)

    return Bunch(dx_group=np.array(dx_group), subjects=np.array(ptids),
                 mmse=mmse, exam_codes=vcodes, exam_codes2=vcodes2)
Ejemplo n.º 33
0
    def _adjoint(self, im, indices):
        memory = Memory(location=self.memory, verbose=0)
        _apply_transform_cached = memory.cache(_apply_xform)

        logger.info('Applying adjoint transformations in pipeline')
        for xform in self.xforms[::-1]:
            im = _apply_transform_cached(xform, im, indices, True)
        logger.info('All adjoint transformations applied')

        return im
    def _run_suject_level1_glm(subject_data_dir, subject_output_dir,
                               **kwargs):
        """
        Just another wrapper.

        """

        mem = Memory(os.path.join(subject_output_dir, "cache_dir"))
        return mem.cache(run_suject_level1_glm)(subject_data_dir,
                                                subject_output_dir,
                                                **kwargs)
Ejemplo n.º 35
0
    def __init__(self, meta=None):
        if meta is not None:
            self._meta = meta

        self.name = self.__class__.__name__

        try:
            from joblib import Memory
            mem = Memory(cachedir=self.home('cache'))
            self._get_meta = mem.cache(self._get_meta)
        except ImportError:
            pass
Ejemplo n.º 36
0
Archivo: utils.py Proyecto: fliem/cpr
def load_features_cached(cache_dir,
                         participant_label,
                         feature_base_dir,
                         clinical_feature_file,
                         modality='clinical'):
    memory = Memory(cache_dir / "load", verbose=0)

    allowed_modalities = [
        "clinical", "structural", "structGlobScort", "functional", "fullcon"
    ]
    modality_parts = modality.split("+")
    for m in modality_parts:
        if m not in allowed_modalities:
            raise ValueError(m, modality)

    features, feature_files = memory.cache(stack_subjects_features)(
        participant_label, feature_base_dir, clinical_feature_file, modality)

    stacked_features = memory.cache(concat_modality_features)(features,
                                                              modality)
    return stacked_features
Ejemplo n.º 37
0
def single(n_players, n_actions, n_matrices, _seed, conditioning, skewness,
           l1_penalty, gaussian_noise, stochastic_noise, _run):
    mem = Memory(location=expanduser('~/cache'))
    H = mem.cache(make_positive_matrices)(n_players, n_actions, n_matrices,
                                          conditioning, skewness,
                                          stochastic_noise, _seed)
    game = MatrixGame(H, l1_penalty=l1_penalty, gaussian_noise=gaussian_noise)

    values, policies = compute_nash(game)

    _run.info['policies'] = policies.tolist()
    _run.info['values'] = values.tolist()
Ejemplo n.º 38
0
def getagreement(tpl,datadir,task_type='all'):
    """Get agreement values for annotators in the :data:'tpl' list

    Args:
       tpl (list):  combination group of annotators
       datadir (str): Cache data directory used by joblib

    Returns:
       namedtuple defined as ``Agree = collections.namedtuple('Agree', ['kappa', 'alpha','avg_ao'], verbose=True)``
    """

    mem = Memory(cachedir=datadir)
    readjson=mem.cache(json2taskdata.readjson,mmap_mode='r')
    create_task_data= mem.cache(json2taskdata.create_task_data)
    count_occurrances=mem.cache(json2taskdata.count_occurrances)
    count_labels=mem.cache(json2taskdata.count_labels)

    annotators=set()
    lectask=[]
    #-------------------------------------------------------------------------------
    # for each annotator in group tpl
    #-------------------------------------------------------------------------------

    for stditem in tpl:
        aname=stditem.split('.')[0][3:][-2:]
        annotators.add(aname)
        lecdict=readjson(stditem)
        newlectask= create_task_data(lecdict,task_type=task_type,annotator=aname)
        label_data=json2taskdata.create_labels_list(newlectask)
        abscount=count_occurrances(str(label_data))
        yaml.dump(abscount,open(os.path.join( datadir,'abscount-'+aname+'.yaml'),'w'))

        setcount=count_labels(newlectask)
        yaml.dump(setcount,open(os.path.join( datadir,'setcount-'+aname+'.yaml'),'w'))

        lectask=lectask+newlectask

    task=AnnotationTask(data=lectask,distance=nltk.metrics.distance.masi_distance_mod)

    return  {frozenset(annotators): Agree(task.kappa(),task.alpha(),task.avg_Ao())}
def make_dictionary(X,
                    n_components=20,
                    alpha=5.,
                    write_dir='/tmp/',
                    contrasts=[],
                    method='multitask',
                    l1_ratio=.5,
                    n_subjects=13):
    """Create dictionary + encoding"""
    from sklearn.decomposition import dict_learning_online, sparse_encode
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import MultiTaskLasso, MultiTaskElasticNet

    mem = Memory(write_dir, verbose=0)
    dictionary = mem.cache(initial_dictionary)(n_components, X)
    np.savez(os.path.join(write_dir, 'dictionary.npz'),
             loadings=dictionary,
             contrasts=contrasts)
    if method == 'online':
        components, dictionary = dict_learning_online(X.T,
                                                      n_components,
                                                      alpha=alpha,
                                                      dict_init=dictionary,
                                                      batch_size=200,
                                                      method='cd',
                                                      return_code=True,
                                                      shuffle=True,
                                                      n_jobs=1,
                                                      positive_code=True)
        np.savez(os.path.join(write_dir, 'dictionary.npz'),
                 loadings=dictionary,
                 contrasts=contrasts)
    elif method == 'sparse':
        components = sparse_encode(X.T,
                                   dictionary,
                                   alpha=alpha,
                                   max_iter=10,
                                   n_jobs=1,
                                   check_input=True,
                                   verbose=0,
                                   positive=True)
    elif method == 'multitask':
        # too many hard-typed parameters !!!
        n_voxels = X.shape[1] // n_subjects
        components = np.zeros((X.shape[1], n_components))
        clf = MultiTaskLasso(alpha=alpha)
        clf = MultiTaskElasticNet(alpha=alpha, l1_ratio=l1_ratio)
        for i in range(n_voxels):
            x = X[:, i:i + n_subjects * n_voxels:n_voxels]
            components[i: i + n_subjects * n_voxels: n_voxels] =\
                clf.fit(dictionary.T, x).coef_
    return dictionary, components
Ejemplo n.º 40
0
    def init_phd_lab(cls, directory: phd_lab_directory) -> None:
        if cls.phd_lab_directory is not None:
            raise RuntimeError("Cannot initialize PhdLabWrapper "
                               "a second time!")
        cls.phd_lab_directory = directory

        memcache = os.path.join(cls.phd_lab_directory, 'memcache')
        if not os.path.exists(memcache):
            os.makedirs(memcache)
        memory = Memory(".memcache", verbose=True)

        global fit_with_cache
        fit_with_cache = memory.cache(fit_with_cache)
Ejemplo n.º 41
0
def main():
##    subsdir=r'E:\elan projects\L2\submissions\extracted'
##    dstdir=os.path.join(subsdir,r'passed')
##    copypassedfiles(dstdir,subsdir)
    dstdir=r'E:\elan projects\L2\resubmission\full'
    import glob
    jsonflist=glob.glob(dstdir+'\\'+r'*.379.json')

    mem = Memory(cachedir=dstdir)
    json2agreementmatrix_cached=mem.cache(json2agreementmatrix)

    c=json2agreementmatrix_cached(jsonflist,task_type='all')
    print c
Ejemplo n.º 42
0
Archivo: solver.py Proyecto: amoliu/lfd
 def __init__(self, use_cache=True, cachedir=None):
     """Inits TpsSolverFactory
     
     Args:
         use_cache: whether to cache solver matrices in file
         cache_dir: cached directory. if not specified, the .cache directory in parent directory of top-level package is used.
     """
     if use_cache:
         if cachedir is None:
             # .cache directory in parent directory of top-level package
             cachedir = os.path.join(__import__(__name__.split('.')[0]).__path__[0], os.path.pardir, ".cache")
         memory = Memory(cachedir=cachedir, verbose=0)
         self.get_solver_mats = memory.cache(self.get_solver_mats)
Ejemplo n.º 43
0
def _load_data(root_dir="/",
               data_set="ds107",
               cache_dir="/volatile/storage/workspace/parietal_retreat/" +
               "covariance_learn/cache/",
               n_jobs=1):
    from joblib import Memory
    mem = Memory(cachedir=cache_dir)
    load_data_ = mem.cache(setup_data_paths.run)

    df = setup_data_paths.get_all_paths(root_dir=root_dir, data_set=data_set)
    # region_signals = joblib.load(os.path.join(root_dir, dump_file))
    region_signals = load_data_(root_dir=root_dir, data_set=data_set,
                                n_jobs=n_jobs,
                                dump_dir=os.path.join(cache_dir, data_set))
    return df, region_signals
Ejemplo n.º 44
0
    def _delete_orientation(self):
        """
        Delete orientation metadata. Garbage orientation metadata can lead to
        severe mis-registration trouble.

        """

        # prepare for smart caching
        cache_dir = os.path.join(self.output_dir, 'cache_dir')
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
        mem = Memory(cachedir=cache_dir, verbose=5)

        # deleteorient for func
        self.func = [mem.cache(delete_orientation)(
                self.func[j],
                self.tmp_output_dir,
                output_tag=self.session_id[j])
                     for j in xrange(len(self.session_id))]

        # deleteorient for anat
        if not self.anat is None:
            self.anat = mem.cache(delete_orientation)(
                self.anat, self.tmp_output_dir)
Ejemplo n.º 45
0
  def __init__(self, caching=False):
    """Create a new CompatIdFetcher object.

    Args:
      caching: Whether to cache setup from run to run. See
        PrebuiltCompatibilityTest.CACHING for details.
    """
    self.compat_ids = None
    if caching:
      # This import occurs here rather than at the top of the file because we
      # don't want to force developers to install joblib. The caching argument
      # is only set to True if PrebuiltCompatibilityTest.CACHING is hand-edited
      # (for testing purposes).
      # pylint: disable=import-error
      from joblib import Memory
      memory = Memory(cachedir=tempfile.gettempdir(), verbose=0)
      self.FetchCompatIds = memory.cache(self.FetchCompatIds)
Ejemplo n.º 46
0
def ica_step(group_maps, group_variance, cachedir=None):
    memory = Memory(cachedir=cachedir, mmap_mode='r')
    # We do a spatial ICA: the arrays are transposed in the following,
    # axis1 = component, and axis2 is voxel number.

    _, ica_maps = memory.cache(fastica)(group_maps.T, whiten=False)

    # Project the ICAs on the group maps to give a 'cross-subject
    # reproducibility' score.
    proj = np.dot(ica_maps, group_maps)
    reproducibility_score = (np.abs(proj)*group_variance).sum(axis=-1)

    order = np.argsort(reproducibility_score)[::-1]

    ica_maps = ica_maps[order, :]

    return ica_maps.T
Ejemplo n.º 47
0
    class _DiskCache(object):

        cached_methods = methods

        def __init__(self, *args, **kwargs):
            from tempfile import mkdtemp
            from joblib import Memory
            self.cachedir = cachedir or mkdtemp()

            self.memory = Memory(cachedir=self.cachedir)
            for method in self.cached_methods:
                setattr(self, method, self.memory.cache(getattr(self, method)))

            if not os.path.isdir(self.cachedir):
                raise OSError("Non-existent directory: ", self.cachedir)

            super(_DiskCache, self).__init__(*args, **kwargs)
Ejemplo n.º 48
0
    def __init__(self, meta=None, seed=0, ntrain=15, ntest=15, num_splits=10):

        self.seed = seed
        self.ntrain = ntrain
        self.ntest = ntest
        self.num_splits = num_splits

        if meta is not None:
            self._meta = meta

        self.name = self.__class__.__name__

        try:
            from joblib import Memory
            mem = Memory(cachedir=self.home('cache'))
            self._get_meta = mem.cache(self._get_meta)
        except ImportError:
            pass
Ejemplo n.º 49
0
    def __init__(self, root,
                 filter_species_ids=None,
                 required_attributes=None,
                 transform=None,
                 is_training=False,
                 cachedir=CACHE_DIR):
        super(GogglesDataset, self).__init__()

        mem = Memory(cachedir)
        metadata_loader = mem.cache(self._load_metadata)

        self.is_training = is_training
        self._data_dir = root

        required_species, \
            self.attributes, \
            self._image_data = metadata_loader(root)  # _load_metadata(root) cached

        if filter_species_ids is not None:
            assert type(filter_species_ids) is list
            filter_species_ids = set(filter_species_ids)
            required_species = list(filter(lambda s: s.id in filter_species_ids, required_species))
            self._image_data = list(filter(lambda d: d.species.id in filter_species_ids, self._image_data))
        self._species_labels = {species: label for label, species in enumerate(required_species)}

        if is_training is not None:
            self._image_data = list(filter(
                lambda d: d.is_for_training == is_training,
                self._image_data))

        if required_attributes is not None:
            assert type(required_attributes) is list
            self.attributes = required_attributes
        elif filter_species_ids is not None:
            attributes = set()
            for species in required_species:
                attributes = attributes.union(species.attributes)
            self.attributes = list(sorted(attributes, key=lambda a: a.id))
        self.num_attributes = len(self.attributes)

        if transform is not None:
            self._transform = transform
        else:
            self._transform = transforms.Compose([transforms.ToTensor()])
Ejemplo n.º 50
0
    def __init__(self, meta=None, seed=0, ntrain=10, ntest=10, num_splits=5):

        self.seed = seed
        self.ntrain = ntrain
        self.ntest = ntest
        self.num_splits = num_splits
        self.names = ["Face", "Body", "Object"]

        if meta is not None:
            self._meta = meta

        self.name = self.__class__.__name__

        try:
            from joblib import Memory

            mem = Memory(cachedir=self.home("cache"))
            self._get_meta = mem.cache(self._get_meta)
        except ImportError:
            pass
Ejemplo n.º 51
0
    def test_cached(self):
        try:
            from joblib import Memory
            mem = Memory(self.cache_dir)
            dep_tree = {
                'a': 5,
                'b': 6,
                'c': mem.cache(slow_func),
            }
            data = Pipeline(dep_tree)
            t0 = time.time()
            data.resolve()
            delta = time.time() - t0

            t0 = time.time()
            data.resolve()
            delta = time.time() - t0
            assert delta < .1
        except:
            pass
Ejemplo n.º 52
0
def compute_confidence_par(allLearners, dada):

    lab_confidence = np.zeros([dada.shape[0], len(allLearners)])
    tic = time.time()
    #import ipdb;ipdb.set_trace()
    print 'producing weighted outputs IN PARALLEL'
    
    mem = Memory(cachedir='tmp')
    classif_RBF2 = mem.cache(confidence_par)
    
    c = l_c[0]
    r = Parallel(n_jobs=N_JOBS)(delayed(confidence_par)(allLearners,ii,dada) for ii in enumerate(allLearners))
    res, iis = zip(*r)
    
    for t,y in enumerate(iis):
        lab_confidence[:,y] = res[t]
    
    print "time taken to produce confidence:", round(time.time() - tic,2), "seconds"
    #import ipdb;ipdb.set_trace()
    return lab_confidence
Ejemplo n.º 53
0
    def __init__(
        self,
        segmentation=None,
        duration=1., step=0.1, gap=0., threshold=0.,
        feature=None, cache=False
    ):

        super(SpeechTurnSegmentation, self).__init__()

        if segmentation is None:

            self.segmentation = SegmentationGaussianDivergence(
                duration=duration, step=step, gap=gap,
                threshold=threshold
            )

        else:

            self.segmentation = segmentation

        # default features for segmentation
        # are MFCC (energy + 12 coefficients)
        if feature is None:
            from pyannote.feature.yaafe import YaafeMFCC
            feature = YaafeMFCC(
                e=True, De=False, DDe=False,
                coefs=12, D=False, DD=False
            )
        self.feature = feature

        if cache:

            # initialize cache
            from joblib import Memory
            from tempfile import mkdtemp
            memory = Memory(cachedir=mkdtemp(), verbose=0)

            # cache feature extraction method
            self.get_features = memory.cache(self.get_features)
def svm_cla_sklearn_feat_sel(features_train, features_test, labels_train, labels_test):
    from sklearn.feature_selection import SelectPercentile, SelectKBest, f_classif, RFECV
    from sklearn.cross_validation import StratifiedKFold
    from sklearn.metrics import zero_one_loss
    
    features_train = sp.array(features_train, dtype = 'uint8')
    features_test = sp.array(features_test, dtype = 'uint8')
    
    print "zscore features"
    tic = time.time()
    features_train, mean_f, std_f = features_preprocessing(features_train)
    features_test, mean_f, std_f  = features_preprocessing(features_test, mean_f, std_f)
    print "time taken to zscore data is:", round(time.time() - tic) , "seconds"
    
    featSize = np.shape(features_train)
    selector = LinearSVC(C=0.0007, penalty="l1", dual=False).fit(features_train, labels_train)

    print 'Starting with %d samp, %d feats, keeping %d' % (featSize[0], featSize[1], (np.shape(selector.transform(features_train)))[1])
    print 'classifying'
    
    features_train = selector.transform(features_train)
    features_test = selector.transform(features_test)
    #import ipdb; ipdb.set_trace()
    mem = Memory(cachedir='tmp')
    classif_RBF2 = mem.cache(classif_RBF)

    c = l_c[0]
    Parallel(n_jobs=8)(delayed(classif_RBF2)(features_train, features_test, labels_train, labels_test, g, c) for g in l_g)
    #import ipdb; ipdb.set_trace()

    print "Starting CONTROL classification for c = ", c
    tic = time.time()
    clf = SVC(C=c)
    clf.fit(features_train, labels_train) #[:1960][:]
    score = clf.score(features_test, labels_test) #[:13841][:]
    print "selected CONTROL score for c = ", c, "is: ", score
    print "time taken:", time.time() - tic, "seconds"
Ejemplo n.º 55
0
test_func = mem.cache(test_func)

Parallel(n_jobs=1)(delayed(test_func)(i) for i in [a, a, a])

Parallel(n_jobs=2)(delayed(test_func)(i) for i in [a, a, a])

### Can use with latest version on github
from joblib import Parallel, delayed
import numpy as np
a = np.memmap('/tmp/memmaped', dtype=np.float32, mode='w+', shape=(3, 5))
b = np.memmap('/tmp/memmaped', dtype=np.float32, mode='r', shape=(3, 5))
Parallel(n_jobs=2)(delayed(np.mean)(x) for x in np.array_split(b, 3))

cachedir2 = mkdtemp()
memory2 = Memory(cachedir=cachedir2, mmap_mode='r')
square = memory2.cache(np.square)
a = np.vander(np.arange(3)).astype(np.float)
square(a)


import joblib
import numpy as np
testarray = {}
for i in xrange(5):
    testarray[i] = convert2memmap(np.array(range(500*100)))
filepath = "/tmp/test.joblib"
res = joblib.dump(testarray, filepath)
testarray = joblib.load(filepath, mmap_mode="r+")


for key in testarray:
Ejemplo n.º 56
0
 def cfunc(*fargs, **fkwargs):
     return Memory.cache(self, func, *args, **kwargs).__call__(*fargs, **fkwargs)