def events(vhdr_path=None): if vhdr_path is None: vhdr_path = ui.ask_file("Pick a Brain Vision EEG Header File", "Pick a Brain Vision EEG Header File", ext=[('vhdr', 'Brain Vision Header File')]) if not vhdr_path: return hdr = vhdr(vhdr_path) if hdr.markerfile is None: raise IOError("No marker file referenced in %r" % vhdr_path) elif hdr.DataType == 'FREQUENCYDOMAIN': raise NotImplementedError txt = open(hdr.markerfile).read() m = marker_re.findall(txt) m = np.array(m) name, _ = os.path.split(os.path.basename(hdr.path)) ds = dataset(name=name) ds['Mk'] = var(np.array(m[:,0], dtype=int)) ds['event_type'] = factor(m[:,1]) ds['event_ID'] = var(np.array(m[:,3], dtype=int)) ds['i_start'] = var(np.array(m[:,4], dtype=int)) ds['points'] = var(np.array(m[:,5], dtype=int)) ds['channel'] = var(np.array(m[:,6], dtype=int)) ds.info['hdr'] = hdr ds.info['samplingrate'] = hdr.samplingrate return ds
def export_durs(self): ds = dataset() idx = self.words == 'sp' words = self.words[~idx] durs = self.word_durs[~idx] ds['words'] = factor([first.lower() + second.lower() for first, second in zip(words[::2], words[1::2])]) ds['c1_dur'] = var(durs[::2]) ds['c2_dur'] = var(durs[1::2]) return ds
def export_durs(self): ds = dataset() idx = self.words == 'sp' words = self.words[~idx] durs = self.word_durs[~idx] ds['words'] = factor([ first.lower() + second.lower() for first, second in zip(words[::2], words[1::2]) ]) ds['c1_dur'] = var(durs[::2]) ds['c2_dur'] = var(durs[1::2]) return ds
def get_dataset(self): "get a dataframe containing Y and covariates" Y, covs = self._collect_data() indexes = Y.keys() Ydata = [Y[index] for index in indexes] Y = self._get_vessel_for_Y(Ydata) ds = _vsl.dataset(Y) # create _data objects for var, valdict in covs.iteritems(): X = [valdict[index] for index in indexes] if var.dict_enabled: Y = _vsl.factor(X, name=var.name, random=var.random, labels=var.dictionary, colors=var._color_dict) else: Y = _vsl.var(X, name=var.name) ds.add(Y) return ds
def tsv(path=None, names=True, types='auto', empty='nan', delimiter=None, skiprows=0): """ returns a ``dataset`` with data from a tab-separated values file. Parameters ---------- names : list of str | bool * ``['name1', ...]`` use these names * ``True``: look for names on the first line of the file * ``False``: use "v1", "v2", ... types : 'auto' | list of int * ``'auto'`` -> import as var if all values can be converted float, otherwise as factor * list of 0=auto, 1=factor, 2=var. e.g. ``[0,1,1,0]`` empty : value to substitute for empty cells delimiter : str value delimiting cells in the input file (None = any whitespace; e.g., ``'\\t'``) skiprows : int Skip so many rows at the beginning of the file (for tsv files with headers). Column names (if names==True) are expected to come after the skipped rows. """ if path is None: path = ui.ask_file("Select file to import as dataframe", "Select file to import as dataframe") if not path: return with open(path) as f: for i in xrange(skiprows): f.readline() # read / create names if names == True: names = f.readline().split(delimiter) names = [n.strip().strip('"') for n in names] lines = [] for line in f: values = [] for v in line.split(delimiter): v = v.strip() if not v: v = empty values.append(v) lines.append(values) n_vars = len(lines[0]) if not names: names = ['v%i' % i for i in xrange(n_vars)] n = len(names) # decide whether to drop first column if n_vars == n: start = 0 elif n_vars == n + 1: start = 1 else: raise ValueError("number of header different from number of data") if types in ['auto', None, False, True]: types = [0] * n else: assert len(types) == n # prepare for reading data data = [] for _ in xrange(n): data.append([]) # read rest of the data for line in lines: for i, v in enumerate(line[start:]): for str_del in ["'", '"']: if v[0] == str_del: v = v.strip(str_del) types[i] = 1 data[i].append(v) ds = _data.dataset(name=os.path.basename(path)) for name, values, force_type in zip(names, data, types): v = np.array(values) if force_type in [0, 2]: try: v = v.astype(float) f = _data.var(v, name=name) except: f = _data.factor(v, name=name) else: f = _data.factor(v, name=name) ds.add(f) return ds