def from_pandas(df): """ desc: Converts a pandas DataFrame to a DataMatrix. arguments: dm: type: DataFrame returns: type: DataMatrix """ from datamatrix import operations as ops dm = DataMatrix(length=len(df)) for colname in df.columns: if isinstance(colname, tuple): _colname = u'_'.join([str(i) for i in colname]) else: _colname = colname try: exec('%s = None' % _colname) except SyntaxError: dm[u'_%s' % _colname] = df[colname] else: dm[_colname] = df[colname] ops.auto_type(dm) return dm
def test_auto_type(): dm = DataMatrix(length=2) dm.a = 'a', 1 dm.b = 0.1, 1 dm.c = 0, 1 ops.auto_type(dm) ok_(isinstance(dm.a, MixedColumn)) ok_(isinstance(dm.b, FloatColumn)) ok_(isinstance(dm.c, IntColumn))
def get_trace_data(sub, run, tracename): """A generic function to get a simple trace.""" dm = io.readtxt(TRACE_SRC.format(sub=sub, run=run)) dm = ops.auto_type(dm) dm[tracename] @= lambda i: np.nan if i == 0 else i dm[tracename] = srs._interpolate(ops.z(dm[tracename])) return deconv_hrf(flt(dm[tracename]))
def from_pandas(df): """ desc: | Converts a pandas DataFrame to a DataMatrix. __Example:__ %-- python: | import pandas as pd from datamatrix import convert df = pd.DataFrame( {'col' : [1,2,3] } ) dm = convert.from_pandas(df) print(dm) --% arguments: df: type: DataFrame returns: type: DataMatrix """ from datamatrix import operations as ops dm = DataMatrix(length=len(df)) if isinstance(df, pd.Series): dm.series = df return dm for colname in df.columns: if isinstance(colname, tuple): _colname = u'_'.join([str(i) for i in colname]) else: _colname = colname try: exec('%s = None' % _colname) except SyntaxError: dm[u'_%s' % _colname] = df[colname] else: dm[_colname] = df[colname] ops.auto_type(dm) return dm
def test_auto_type(): dm = DataMatrix(length=2) dm.a = 'a', 1 dm.b = 0.1, 1 dm.c = 0, 1 dm = ops.auto_type(dm) assert isinstance(dm.a, MixedColumn) assert isinstance(dm.b, FloatColumn) assert isinstance(dm.c, IntColumn)
def get_pupil_data(sub, run): """Get a preprocessed pupil trace for one subject and one avmovie run """ dm = io.readtxt(TRACE_SRC.format(sub=sub, run=run)) dm = ops.auto_type(dm) dm.pupil_size @= lambda i: np.nan if i == 0 else i dm.luminance @= lambda i: np.nan if i == 0 else i dm.pupil_size = srs._interpolate(ops.z(dm.pupil_size)) dm.luminance = srs._interpolate(ops.z(dm.luminance)) if REMOVE_LUMINANCE_FROM_PUPIL: i, s1, s2 = np.polyfit(dm.pupil_size, dm.luminance, deg=2) dm.pupil_size -= i + s1 * dm.luminance + s2 * dm.luminance ** 2 return deconv_hrf(flt(dm.pupil_size))
def __init__(self, folder=u'data', ext=u'.asc', downsample=None, maxtracelen=None): """ desc: Constructor. keywords: data: type: str desc: The folder containing data files ext: type: str desc: The data-file extension downsample: type: [int, None] desc: Indicates whether traces (if any) should be downsampled. For example, a value of 10 means a sample is retained at most once every 10 ms (but less if the sampling rate). is less to begin with. maxtracelen: type: [int, None] desc: A maximum length for traces. Longer traces are truncated and a UserWarning is emitted. """ self.dm = DataMatrix() self._downsample = downsample self._lastsampletime = None self._maxtracelen = maxtracelen for fname in sorted(os.listdir(folder)): if not fname.endswith(ext): continue path = os.path.join(folder, fname) self.dm <<= self.parse_file(path) operations.auto_type(self.dm)
def __init__( self, folder=u'data', ext=(u'.asc', u'.edf', u'.tar.xz'), downsample=None, maxtracelen=None, traceprocessor=None, phasefilter=None, edf2asc_binary=u'edf2asc', multiprocess=False ): """ desc: Constructor. keywords: folder: type: str desc: The folder containing data files ext: type: str, tuple desc: The data-file extension, or tuple of extensions. downsample: type: [int, None] desc: > Indicates whether traces (if any) should be downsampled. For example, a value of 10 means that the signal becomes 10 times shorter. Downsample creates a simple traceprocessor, and can therefore not be used in combination with the traceprocessor argument. maxtracelen: type: [int, None] desc: A maximum length for traces. Longer traces are truncated and a UserWarning is emitted. This length refers to the trace after processing. traceprocessor: type: [callable, None] desc: > A function that is applied to each trace before the trace is written to the SeriesColumn. This can be used to apply a series of operations that are best done on the raw signal, such as first correcting blinks and then downsampling the signal. The function must accept two arguments: first a label for the trace, which is 'pupil', 'xcoor', 'ycoor', or 'time'. This allows the function to distinguish the different kinds of singals; second, the trace itself. See `eyelinkparser.defaulttraceprocessor` for a convenience function that applies blink correction and downsampling. phasefilter: type: [callable,None] desc: > A function that receives a phase name as argument, and returns a bool indicating whether that phase should be retained. edf2asc_binary: type: str desc: > The name of the edf2asc executable, which if available can be used to automatically convert edf files to asc. multiprocess: type: [bool, int, None] desc: > Indicates whether each file should be processed in a different process. This can speed up parsing considerably. If it's not False, it should be an int to indicate the number of processes, or None to indicate that the number of processes should be the same as the number of cores. """ self.dm = DataMatrix() if downsample is not None: if traceprocessor is not None: raise ValueError( 'You can specify a downsampling rate or traceprocessor, but not both') traceprocessor = defaulttraceprocessor(downsample=downsample) self._maxtracelen = maxtracelen self._traceprocessor = traceprocessor self._phasefilter = phasefilter self._edf2asc_binary = edf2asc_binary # Get a list of input files. First, only files in the data folder that # match any of the extensions. Then, these files are passed to the # converter which may return multiple files, for example if they have # been compressed. The result is a list of iterators, which is chained # into a single iterator. input_files = itertools.chain(*( self.convert_file(os.path.join(folder, fname)) for fname in sorted(os.listdir(folder)) if ( fname.lower().endswith(ext.lower()) if isinstance(ext, basestring) else any(fname.lower().endswith(e.lower()) for e in ext) ) )) if multiprocess: import multiprocessing as mp with mp.Pool(multiprocess) as p: filedms = p.map(self.parse_file, input_files) while filedms: self.dm <<= filedms.pop() else: for fname in input_files: self.dm <<= self.parse_file(fname) operations.auto_type(self.dm)