Python memmapの例、pylab.memmap Pythonの例

コード例 #1

0

ファイルを表示

def filtfiltlong(finname,
                 foutname,
                 fmt,
                 b,
                 a,
                 buffer_len=100000,
                 overlap_len=100,
                 max_len=-1):
    """Use memmap and chunking to filter continuous data.
  Inputs:
    finname -
    foutname    -
    fmt         - data format eg 'i'
    b,a         - filter coefficients
    buffer_len  - how much data to process at a time
    overlap_len - how much data do we add to the end of each chunk to smooth out filter transients
    max_len     - how many samples to process. If set to -1, processes the whole file
  Outputs:
    y           - The memmapped array pointing to the written file


  Notes on algorithm:
    1. The arrays are memmapped, so we let pylab (numpy) take care of handling large arrays
    2. The filtering is done in chunks:

    Chunking details:

                |<------- b1 ------->||<------- b2 ------->|
    -----[------*--------------{-----*------]--------------*------}----------
         |<-------------- c1 -------------->|
                               |<-------------- c2 -------------->|

    From the array of data we cut out contiguous buffers (b1,b2,...) and to each buffer we add some extra overlap to
    make chunks (c1,c2). The overlap helps to remove the transients from the filtering which would otherwise appear at
    each buffer boundary.

  """
    x = pylab.memmap(finname, dtype=fmt, mode='r')
    if max_len == -1:
        max_len = x.size
    y = pylab.memmap(foutname, dtype=fmt, mode='w+', shape=max_len)

    for buff_st_idx in xrange(0, max_len, buffer_len):
        chk_st_idx = max(0, buff_st_idx - overlap_len)
        buff_nd_idx = min(max_len, buff_st_idx + buffer_len)
        chk_nd_idx = min(x.size, buff_nd_idx + overlap_len)
        rel_st_idx = buff_st_idx - chk_st_idx
        rel_nd_idx = buff_nd_idx - chk_st_idx
        this_y_chk = filtfilt(b, a, x[chk_st_idx:chk_nd_idx])
        y[buff_st_idx:buff_nd_idx] = this_y_chk[rel_st_idx:rel_nd_idx]

    return y

コード例 #2

0

ファイルを表示

ファイル: lynxio.py プロジェクト: georgedimitriadis/themeaningofbrain

def read_all_csc(data_folder, dtype='int16', assume_same_fs=True, memmap=False, memmap_folder=None, save_for_spikedetekt=False, channels_to_save=None, return_sliced_data=False):
    if sys.version_info[0] > 2:
        mode = 'br'
    else:
        mode = 'r'

    os_name = platform.system()
    if os_name == 'Windows':
        sep = '\\'
    elif os_name=='Linux':
        sep = r'/'

    files = [os.path.join(data_folder, f) for f in os.listdir(data_folder) if f.endswith('.ncs')]
    order = [int(file.split('.')[0].split('CSC')[1]) for file in files]
    sort_order =  sorted(range(len(order)),key=order.__getitem__)
    ordered_files = [files[i] for i in sort_order]

    if memmap:
        if not memmap_folder:
            raise NameError("A memmap_folder should be defined for memmapped data")
        out_filename = data_folder.split(sep)[-1]+'.dat'
        out_full_filename = os.path.join(memmap_folder, out_filename)

    data = None
    i = 0;
    for file in ordered_files:
        fin = open(file, mode=mode)
        x = read_single_csc(fin, assume_same_fs=assume_same_fs, memmap=memmap)
        if not assume_same_fs or memmap:
            channel_data = x['packets']['samp'].ravel()
            if data is None:
                data = pylab.memmap(out_full_filename, dtype=dtype, mode='w+', shape=(pylab.size(files), channel_data.size))
            else:
                data[i,:] = channel_data
                data.flush()
                i = i+1
                print(i)
        else:
            channel_data = x['trace']
            if data is None:
                data = pylab.zeros(shape=(pylab.size(files), channel_data.size), dtype=dtype)
            else:
                data[i,:] = channel_data
                i = i+1
                print(i)

    data_to_return = data
    if save_for_spikedetekt:
        if channels_to_save:
            data2 = data[channels_to_save,:]
            if return_sliced_data:
                data_to_return = data2
        else:
            data2 = data
        data2 = pylab.transpose(data2)
        data2.reshape(data2.size)
        filename = os.path.join(memmap_folder, 'spikedetekt_'+out_filename)
        data2.astype(dtype).tofile(filename)

    return data_to_return

コード例 #3

0

ファイルを表示

ファイル: continuous.py プロジェクト: kghose/neurapy

def filtfiltlong(finname, foutname, fmt, b, a, buffer_len=100000, overlap_len=100, max_len=-1):
  """Use memmap and chunking to filter continuous data.
  Inputs:
    finname -
    foutname    -
    fmt         - data format eg 'i'
    b,a         - filter coefficients
    buffer_len  - how much data to process at a time
    overlap_len - how much data do we add to the end of each chunk to smooth out filter transients
    max_len     - how many samples to process. If set to -1, processes the whole file
  Outputs:
    y           - The memmapped array pointing to the written file


  Notes on algorithm:
    1. The arrays are memmapped, so we let pylab (numpy) take care of handling large arrays
    2. The filtering is done in chunks:

    Chunking details:

                |<------- b1 ------->||<------- b2 ------->|
    -----[------*--------------{-----*------]--------------*------}----------
         |<-------------- c1 -------------->|
                               |<-------------- c2 -------------->|

    From the array of data we cut out contiguous buffers (b1,b2,...) and to each buffer we add some extra overlap to
    make chunks (c1,c2). The overlap helps to remove the transients from the filtering which would otherwise appear at
    each buffer boundary.

  """
  x = pylab.memmap(finname, dtype=fmt, mode='r')
  if max_len == -1:
    max_len = x.size
  y = pylab.memmap(foutname, dtype=fmt, mode='w+', shape=max_len)

  for buff_st_idx in xrange(0, max_len, buffer_len):
    chk_st_idx = max(0, buff_st_idx - overlap_len)
    buff_nd_idx = min(max_len, buff_st_idx + buffer_len)
    chk_nd_idx = min(x.size, buff_nd_idx + overlap_len)
    rel_st_idx = buff_st_idx - chk_st_idx
    rel_nd_idx = buff_nd_idx - chk_st_idx
    this_y_chk = filtfilt(b, a, x[chk_st_idx:chk_nd_idx])
    y[buff_st_idx:buff_nd_idx] = this_y_chk[rel_st_idx:rel_nd_idx]

  return y

コード例 #4

0

ファイルを表示

ファイル: lynxio.py プロジェクト: kghose/neurapy

def read_extracted_data(fname, type='addata'):
  """Reads data file extracted by extract_nrd.
  Inputs:
    fname - name of the file we want to read.
    type  - type of the data. Has to be one of 'ts','ttl' or 'addata'
      'ts' - time stamps which are uint64 and give values in microseconds
      'ttl' - the parallel port input which is uint32
      'addata' - the continuous A/D channel data which is int32
  Output:
    data - pylab array of appropriate type
  """
  if type == 'ts':
    fmt = 'Q'
  elif type == 'ttl':
    fmt = 'I'
  elif type == 'addata':
    fmt = 'i'
  else:
    logger.error('Unrecognized data type {:s}'.format(type))
    return None

  return pylab.memmap(fname, dtype=fmt, mode='r')

コード例 #5

0

ファイルを表示

def read_extracted_data(fname, type='addata'):
    """Reads data file extracted by extract_nrd.
  Inputs:
    fname - name of the file we want to read.
    type  - type of the data. Has to be one of 'ts','ttl' or 'addata'
      'ts' - time stamps which are uint64 and give values in microseconds
      'ttl' - the parallel port input which is uint32
      'addata' - the continuous A/D channel data which is int32
  Output:
    data - pylab array of appropriate type
  """
    if type == 'ts':
        fmt = 'Q'
    elif type == 'ttl':
        fmt = 'I'
    elif type == 'addata':
        fmt = 'i'
    else:
        logger.error('Unrecognized data type {:s}'.format(type))
        return None

    return pylab.memmap(fname, dtype=fmt, mode='r')

コード例 #6

0

ファイルを表示

#----------Data generation-----------------
data = lio.read_all_csc(folder,
                        assume_same_fs=False,
                        memmap=True,
                        memmap_folder=memap_folder,
                        save_for_spikedetekt=False,
                        channels_to_save=None,
                        return_sliced_data=False)
pl.save(os.path.join(memap_folder, 'B14R9_raw.npy'), data)

data_ecog = data[:64, :]
data_probe = data[64:, :]

data_probe_hp = pl.memmap(os.path.join(memap_folder, 'data_probe_hp.dat'),
                          dtype='int16',
                          mode='w+',
                          shape=pl.shape(data_probe))
for i in pl.arange(0, pl.shape(data_probe)[0]):
    data_probe_hp[i, :] = filters.high_pass_filter(data_probe[i, :],
                                                   Fsampling=f_sampling,
                                                   Fcutoff=f_hp_cutoff)
    data_probe_hp.flush()
    print(i)
pl.save(os.path.join(memap_folder, 'data_probe_hp.npy'), data_probe_hp)

shape_data_ss = (pl.shape(data_ecog)[0],
                 pl.shape(data_ecog)[1] / int(f_sampling / f_subsample))
data_ecog_lp_ss = pl.memmap(os.path.join(memap_folder, 'data_ecog_lp_ss.dat'),
                            dtype='int16',
                            mode='w+',
                            shape=shape_data_ss)

コード例 #7

0

ファイルを表示

ファイル: ecog_atlas_B14R9.py プロジェクト: georgedimitriadis/themeaningofbrain

phases_all_shaftA = pl.load(os.path.join(memap_folder, 'phases_all_shaftA.npy'), mmap_mode=None)
phases_all_shaftC = pl.load(os.path.join(memap_folder, 'phases_all_shaftC.npy'), mmap_mode=None)

data = pl.load(os.path.join(memap_folder,'B14R9_raw.npy'), mmap_mode='r+')


#----------Data generation-----------------
data = lio.read_all_csc(folder,  assume_same_fs=False, memmap=True, memmap_folder=memap_folder, save_for_spikedetekt=False, channels_to_save=None, return_sliced_data=False)
pl.save(os.path.join(memap_folder, 'B14R9_raw.npy'), data)

data_ecog = data[:64,:]
data_probe = data[64:,:]


data_probe_hp = pl.memmap(os.path.join(memap_folder,'data_probe_hp.dat'), dtype='int16', mode='w+', shape=pl.shape(data_probe))
for i in pl.arange(0, pl.shape(data_probe)[0]):
    data_probe_hp[i,:] = filters.high_pass_filter(data_probe[i,:], Fsampling=f_sampling, Fcutoff=f_hp_cutoff)
    data_probe_hp.flush()
    print(i)
pl.save(os.path.join(memap_folder, 'data_probe_hp.npy'), data_probe_hp)


shape_data_ss = (pl.shape(data_ecog)[0], pl.shape(data_ecog)[1]/int(f_sampling/f_subsample))
data_ecog_lp_ss = pl.memmap(os.path.join(memap_folder, 'data_ecog_lp_ss.dat'), dtype='int16', mode='w+', shape=shape_data_ss)
for i in pl.arange(0, pl.shape(data_ecog)[0]):
    data_ecog_lp_ss[i,:] = signal.decimate(filters.low_pass_filter(data_ecog[i,:], Fsampling=f_sampling, Fcutoff=f_lp_cutoff), int(f_sampling/f_subsample))
    data_ecog_lp_ss.flush()
    print(i)
pl.save(os.path.join(memap_folder, 'data_ecog_lp_ss.npy'), data_ecog_lp_ss)

コード例 #8

0

ファイルを表示

ファイル: lynxio.py プロジェクト: georgedimitriadis/themeaningofbrain

def read_single_csc(fin, assume_same_fs=True, memmap=False):
    """Read a continuous record file. We return the raw packets but, in addition, if we set assume_same_fs as true we
    return a trace with all the data concatenated together, assuming that a constant sampling frequency was maintained
    through out. Gaps in the record are padded with zeros.
    Input:
    fin - file handle
    assume_same_fs - if True, concatenate any segments together, fill time gaps with zeros and return average Fs
    Ouput:
    Dictionary with fields
      'header' - the file header
      'packets' - the actual packets as read. This is a new pylab dtype with fields:
        'timestamp' - timestamp (us)
        'chan' - channel
        'Fs' - the sampling frequency
        'Ns' - the number of valid samples in the packet
        'samp' - the samples in the packet.
          e.g. x['packets']['samp'] will return a 2D array, number of packets long and 512 wide (since each packet carries 512 wave points)
          similarly x['packets']['timestamp'] will return an array number of packets long
      'Fs': the average frequency computed from the timestamps (can differ from the nominal frequency the device reports)
      'trace': the concatenated data from all the packets
      't0': the timestamp of the first packet.
    NOTE: while 'packets' returns the exact packets read, 'Fs' and 'trace' assume that the record has no gaps and that the
    sampling frequency has not changed during the recording
    """
    hdr = read_header(fin)
    csc_packet = pylab.dtype([
    ('timestamp', 'Q'),
    ('chan', 'I'),
    ('Fs', 'I'),
    ('Ns', 'I'),
    ('samp', '512h')
    ])

    if not memmap:
        data = pylab.fromfile(fin, dtype=csc_packet, count=-1)
    else:
        data = pylab.memmap(fin, dtype=csc_packet, mode = 'r', offset=HEADER_BYTE_SIZE)
    Fs = None
    trace = None
    if assume_same_fs:
        if data['Fs'].std() > 1e-6: #
            logger.warning('Fs is not fixed across trace, not packing packets together')
            assume_same_fs = False

    if not assume_same_fs or memmap: return {'header': hdr, 'packets': data}

    sample_duration_us = (1./data['Fs'][0])*1e6
    packet_duration_us = 513 * sample_duration_us
    #For the version we are dealing with, Neuralynx packets are always 512
    #This is actually a very poor estimate if the sampling freq is low, since it rounds to nearest Hz
    #So we'll not rely on this but come up with our own estimate
    #Using 512 samples makes the estimate in high sampling frequencies be 1 or 2 us smaller than the dt_us
    #So we are going to assume that any pause we did takes longer than 1/Fs seconds

    samp = data['samp']
    ts_us = data['timestamp']
    dt_us = pylab.diff(ts_us).astype('f')
    idx = pylab.find(dt_us > packet_duration_us) #This will find any instances where we paused the recording
    if idx.size == 0:#No padding needed
        trace = samp.ravel()
        Fs = (data['Ns'][:-1]/(dt_us*1e-6)).mean()
    else: #We have some padding to do.
        logger.debug('Gaps in record, padding')
        #Our first task is to find all the contiguous sections of data
        idx += 1 #Shifting indexes to point at the packets that come after a gap
        idx = pylab.insert(idx, 0, 0) #Now idx contains the indexes of every packet that starts a contiguous section
        idx = pylab.append(idx,ts_us.size) #And the index of the last packet
        Ns = data['Ns']
        estimFs_sum = 0
        N_samps = 0
        sections = []
        for n in range(idx.size-1): #collect all the sections
            n0 = idx[n]; n1=idx[n+1]
            sections.append(samp[n0:n1].ravel())
            if n1-n0 > 1:#We need more than one packet in a section to get an estimate
                estimFs_sum += (Ns[n0:n1-1]/(dt_us[n0:n1-1]*1e-6)).sum()
                N_samps += n1-1-n0

        Fs = estimFs_sum / float(N_samps)
        #Now pad the data appropriately
        padded = [sections[0]]
        cum_N = sections[0].size
        for n in range(1,len(sections)):
            #Now figure out how many zeros we have to pad to get the right length
            Npad = int((ts_us[idx[n]] - ts_us[0])*1e-6*Fs - cum_N)
            padded.append(pylab.zeros(Npad))
            padded.append(sections[n])
            cum_N += Npad + sections[n].size
        trace = pylab.concatenate(padded) #From this packet to the packet before the gap

    return {'header': hdr, 'packets': data, 'Fs': Fs, 'trace': trace, 't0': ts_us[0]}