def test_type(eng, tmpdir):
    data = td.series.fromrandom(engine=eng)
    path = os.path.join(tmpdir.dirname, 'test0')
    save_rdd_as_pickle(data, path)
    with pytest.raises(ValueError) as excinfo:
        _ = load_rdd_from_pickle(eng, path, return_type='error')
    assert 'return_type not' in str(excinfo.value)
def test_overwrite_false(eng, tmpdir):
    data = td.images.fromrandom(engine=eng)
    path = os.path.join(tmpdir.dirname, 'test5')
    save_rdd_as_pickle(data, path)
    with pytest.raises(IOError) as excinfo:
        save_rdd_as_pickle(data, path)
    assert 'already exists and overwrite is false' in str(excinfo.value)
def test_overwrite_true(eng, tmpdir):
    data = td.images.fromrandom(engine=eng)
    path = os.path.join(tmpdir.dirname, 'test4')
    save_rdd_as_pickle(data, path)
    save_rdd_as_pickle(data, path, overwrite=True)
    reloaded = load_rdd_from_pickle(eng, path)
    data_local = data.toarray()
    reloaded_local = reloaded.toarray()
    assert np.allclose(data_local, reloaded_local)
    assert data_local.dtype == reloaded_local.dtype
    assert reloaded.npartitions() == eng.defaultParallelism
def test_rdd(eng, tmpdir):
    data = eng.range(100)
    path = os.path.join(tmpdir.dirname, 'test3')
    save_rdd_as_pickle(data, path)
    reloaded = load_rdd_from_pickle(eng, path, return_type='rdd')
    data_local = np.array(sorted(data.collect()))
    reloaded_local = np.array(sorted(reloaded.collect()))
    assert isinstance(reloaded, RDD)
    assert np.allclose(data_local, reloaded_local)
    assert data_local.dtype == reloaded_local.dtype
    assert reloaded.getNumPartitions() == eng.defaultParallelism
def test_images(eng, tmpdir):
    data = td.images.fromrandom(engine=eng)
    path = os.path.join(tmpdir.dirname, 'test2')
    save_rdd_as_pickle(data, path)
    reloaded = load_rdd_from_pickle(eng, path)
    data_local = data.toarray()
    reloaded_local = reloaded.toarray()
    assert isinstance(reloaded, td.images.Images)
    assert np.allclose(data_local, reloaded_local)
    assert data_local.dtype == reloaded_local.dtype
    assert reloaded.npartitions() == eng.defaultParallelism
Beispiel #6
0
def saveRegData(session,
                regData,
                overwrite=False,
                base='/nrs/svoboda/moharb/New/',
                return_name=False):
    """ saves registered data Images object

    :param session: SpineSession object
    :param regData: thunder Images object
    :param overwrite: if True will overwrite files
    :param base: base path to save to
    """
    name = base + session.animalID + '_' + session.date + session.run + 'RegBinaryPickle'
    save_rdd_as_pickle(regData, name, overwrite=overwrite)
    if return_name:
        return name
Beispiel #7
0
def loadData(sc,
             session,
             cutoff=None,
             saveBinary=True,
             start=None,
             stop=None,
             xStart=2,
             xStop=70,
             overwrite=False,
             timepoints=None,
             repartition=True,
             cutoff_fallback=40,
             zoom=None,
             return_clean_path=False,
             binary_path=None,
             **kwargs):
    """ Loads raw data, and return clean after cropping using a cutoff to get rig of electrical noise and correcting for
    bi-directional phase offset

    :param sc: Spark Context
    :param session: SpineSession object
    :param cutoff: cutoff value. if None will try to infer from flyback frames
    :param saveBinary: if True will save to nrs as binary with one file per partition
    :param start: start file to load
    :param stop: last file to load
    :param xStart: first rows to crop
    :param xStop: last rows to crop
    :param overwrite: if true will overwrite the binary saved
    :param timepoints: how many timepoint to use to estimate the bi-directional phase offset
    :param repartition: if True will repartition the data into sc.defaultParallelism * 2 for better performance
    :param cutoff_fallback: if cutoff is None and there are no flyback frames will use this value
    :param zoom: zoom
    :param binary_path: base path to save binary file
    :param kwargs: to be passed to initBase
    :return: raw data Images object, clean Images object
    """
    session.initBase(sc, **kwargs)
    data = session.loadRawData(sc=sc, start=start, stop=stop)
    if repartition:
        data = balanced_repartition(data, sc.defaultParallelism * 2)
    if cutoff is None:
        if not hasattr(session, 'fieldMaskBool') or len(
                np.where(session.fieldMaskBool == 0)[0]) == 0:
            cutoff = cutoff_fallback
            logger.info('Setting cutoff at %d, No flyback frames' %
                        cutoff_fallback)
        else:
            data.cache()
            data.count()
            flyback = np.where(session.fieldMaskBool == 0)[0]
            flybackData = data[:, :, :, flyback]
            flybackDataMean = flybackData.map(np.mean).toarray()
            flybackDataSTD = flybackData.map(np.std).toarray()
            m = np.median(flybackDataMean).astype(float)
            s = np.median(flybackDataSTD).astype(float)
            cutoff = m + 2.5 * s
            logger.info('Mean: %.2f, STD: %.2f, Cutoff: %.2f' % (m, s, cutoff))
    session.display()
    plt.show()
    if timepoints is None:
        timepoints = data.shape[0]
    session.addPipeline('Clean')
    session.addStep('Clean', 'crop1', 'cropDataStep', planes=session.fieldMask)
    session.addStep('Clean', 'clean', 'cleanDataStep', cutoff=cutoff)
    if zoom is not None:
        session.addStep('Clean', 'zoom', 'zoomDataStep', zoom=zoom)
    session.addStep('Clean',
                    'scanPhase',
                    'scanPhaseStep',
                    timepoints=timepoints)
    session.addStep('Clean',
                    'crop2',
                    'cropDataStep',
                    xStart=xStart,
                    xStop=xStop)
    clean = session.doPipeline(pipeline='Clean', data=data)
    clean.cache()
    clean.count()
    data.uncache()
    if binary_path is None:
        name = '/nrs/svoboda/moharb/New/'
    else:
        name = binary_path
    name = name + session.animalID + '_' + session.date + session.run + 'CleanBinaryPickle'
    if saveBinary:
        save_rdd_as_pickle(clean, name, overwrite=overwrite)
    if return_clean_path:
        return data, clean, name
    else:
        return data, clean