Exemple #1
0
def events(vhdr_path=None):
    if vhdr_path is None:
        vhdr_path = ui.ask_file("Pick a Brain Vision EEG Header File", 
                                "Pick a Brain Vision EEG Header File",
                                ext=[('vhdr', 'Brain Vision Header File')])
        if not vhdr_path:
            return
    
    hdr = vhdr(vhdr_path)
    if hdr.markerfile is None:
        raise IOError("No marker file referenced in %r" % vhdr_path)
    elif hdr.DataType == 'FREQUENCYDOMAIN':
        raise NotImplementedError
    
    txt = open(hdr.markerfile).read()
    m = marker_re.findall(txt)
    m = np.array(m)
    
    name, _ = os.path.split(os.path.basename(hdr.path))
    ds = dataset(name=name)
    ds['Mk'] = var(np.array(m[:,0], dtype=int))
    ds['event_type'] = factor(m[:,1])
    ds['event_ID'] = var(np.array(m[:,3], dtype=int))
    ds['i_start'] = var(np.array(m[:,4], dtype=int))
    ds['points'] = var(np.array(m[:,5], dtype=int))
    ds['channel'] = var(np.array(m[:,6], dtype=int))
    
    ds.info['hdr'] = hdr
    ds.info['samplingrate'] = hdr.samplingrate
    return ds
Exemple #2
0
 def export_durs(self):
     ds = dataset()
     idx = self.words == 'sp'
     words = self.words[~idx]
     durs = self.word_durs[~idx]
     ds['words'] = factor([first.lower() + second.lower() for first, second in 
                           zip(words[::2], words[1::2])])
     ds['c1_dur'] = var(durs[::2])
     ds['c2_dur'] = var(durs[1::2])
     return ds
Exemple #3
0
 def export_durs(self):
     ds = dataset()
     idx = self.words == 'sp'
     words = self.words[~idx]
     durs = self.word_durs[~idx]
     ds['words'] = factor([
         first.lower() + second.lower()
         for first, second in zip(words[::2], words[1::2])
     ])
     ds['c1_dur'] = var(durs[::2])
     ds['c2_dur'] = var(durs[1::2])
     return ds
Exemple #4
0
    def get_dataset(self):
        "get a dataframe containing Y and covariates"
        Y, covs = self._collect_data()
        
        indexes = Y.keys()
        Ydata = [Y[index] for index in indexes]
        Y = self._get_vessel_for_Y(Ydata)
        ds = _vsl.dataset(Y)

        # create _data objects
        for var, valdict in covs.iteritems():
            X = [valdict[index] for index in indexes]
            
            if var.dict_enabled:
                Y = _vsl.factor(X, name=var.name, random=var.random,
                                labels=var.dictionary, colors=var._color_dict)
            else:
                Y = _vsl.var(X, name=var.name)
            
            ds.add(Y)
        
        return ds
Exemple #5
0
def tsv(path=None, names=True, types='auto', empty='nan', delimiter=None,
        skiprows=0):
    """
    returns a ``dataset`` with data from a tab-separated values file.


    Parameters
    ----------

    names : list of str | bool
        * ``['name1', ...]`` use these names
        * ``True``: look for names on the first line of the file
        * ``False``: use "v1", "v2", ...
    types : 'auto' | list of int
        * ``'auto'`` -> import as var if all values can be converted float,
          otherwise as factor
        * list of 0=auto, 1=factor, 2=var. e.g. ``[0,1,1,0]``
    empty :
        value to substitute for empty cells
    delimiter : str
        value delimiting cells in the input file (None = any whitespace;
        e.g., ``'\\t'``)
    skiprows : int
        Skip so many rows at the beginning of the file (for tsv files with
        headers). Column names (if names==True) are expected to come after
        the skipped rows.

    """
    if path is None:
        path = ui.ask_file("Select file to import as dataframe",
                           "Select file to import as dataframe")
        if not path:
            return

    with open(path) as f:
        for i in xrange(skiprows):
            f.readline()

        # read / create names
        if names == True:
            names = f.readline().split(delimiter)
            names = [n.strip().strip('"') for n in names]

        lines = []
        for line in f:
            values = []
            for v in line.split(delimiter):
                v = v.strip()
                if not v:
                    v = empty
                values.append(v)
            lines.append(values)

    n_vars = len(lines[0])

    if not names:
        names = ['v%i' % i for i in xrange(n_vars)]

    n = len(names)
    # decide whether to drop first column
    if n_vars == n:
        start = 0
    elif n_vars == n + 1:
        start = 1
    else:
        raise ValueError("number of header different from number of data")

    if types in ['auto', None, False, True]:
        types = [0] * n
    else:
        assert len(types) == n

    # prepare for reading data
    data = []
    for _ in xrange(n):
        data.append([])

    # read rest of the data
    for line in lines:
        for i, v in enumerate(line[start:]):
            for str_del in ["'", '"']:
                if v[0] == str_del:
                    v = v.strip(str_del)
                    types[i] = 1
            data[i].append(v)

    ds = _data.dataset(name=os.path.basename(path))

    for name, values, force_type in zip(names, data, types):
        v = np.array(values)
        if force_type in [0, 2]:
            try:
                v = v.astype(float)
                f = _data.var(v, name=name)
            except:
                f = _data.factor(v, name=name)
        else:
            f = _data.factor(v, name=name)
        ds.add(f)

    return ds