Python factorの例、data.factor Pythonの例

コード例 #1

0

ファイルを表示

ファイル: design.py プロジェクト: teonbrooks/Eelbrain

def add_missing(base, name='missing', values=None):
    """
    returns a factor that contains the values that are not contained in a group 
    of other factors. 
    
    base : list of factors
        factors that together, on each case, contain all the values spare one.
    values : list of str | None
        values for the factor. If None, the first factor's values are used.
    
    """
    N = len(base[0])
    if values is None:
        values = base[0].cells
    
    cells = dict(enumerate(values))
    
    grid = np.empty((N, len(values)), dtype=bool)
    for i, v in cells.iteritems():
        grid[:,i] = np.all([f!=v for f in base], axis=0)
    
    out = _data.factor('?'*N, name=name)
    for i in cells:
        out[grid[:,i]] = cells[i]
    
    return out

コード例 #2

0

ファイルを表示

ファイル: design.py プロジェクト: teonbrooks/Eelbrain

def get_permutated_dataset(variables, count='caseID', randomize=False):
    # sort variables
    perm_rand = []    # permutated and randomized
    perm_nonrand = [] # permutated and not randomized
    for v in variables:
        if v.is_rand:
            perm_rand.append(v)
        else:
            perm_nonrand.append(v)
#    variables = perm_rand + perm_nonrand
    
    # set the variables IDs
    for i,v in enumerate(variables):
        v._set_list_ID(i)
    
    perm_n = [v.Ndraw for v in variables]
    n_trials = np.prod(perm_n)
    n_properties = len(variables)
    out = np.empty((n_trials, n_properties), dtype=np.uint8)
    
    # permutatet variables
    for i,v in enumerate(variables):
        t = np.prod(perm_n[:i])
        r = np.prod(perm_n[i+1:])
        if len(v.urn) == 0:
            out[:,i] = np.tile(np.arange(v.N), t).repeat(r)
        else:
            base = np.arange(v.N)
            for v0 in variables[:i]:
                if v0 in v.urn:
                    base = np.ravel([base[base!=j] for j in xrange(v.N)])
                else:
                    base = np.tile(base, v.Ndraw)
            
            out[:,i] = np.repeat(base, r)
    
    if randomize:
        # shuffle those perm factors that should be shuffled
        n_rand_bins = np.prod([v.Ndraw for v in perm_nonrand])
        rand_bin_len = int(n_trials / n_rand_bins)
        for i in xrange(0, n_trials, rand_bin_len):
            np.random.shuffle(out[i:i+rand_bin_len])
    
    # create dataset
    ds = _data.dataset(name='Design')
    for v in variables:
        x = out[:,v.ID]
        f = _data.factor(x, v.name, labels=v.cells)
        ds.add(f)
    
    if count:
        ds.add(_data.var(np.arange(ds.N), count)) 
    
    return ds

コード例 #3

0

ファイルを表示

ファイル: load.py プロジェクト: kriek197/Eelbrain

def fiff_event_file(path, labels={}):
    events = mne.read_events(path).reshape((-1,6))
    name = os.path.basename(path)
    assert all(events[:,1] == events[:,5])
    assert all(events[:,2] == events[:,4])
    istart = _data.var(events[:,0], name='i_start')
    istop = _data.var(events[:,3], name='i_stop')
    event = _data.var(events[:,2], name='eventID')
    dataset = _data.dataset(event, istart, istop, name=name)
    if labels:
        dataset.add(_data.factor(events[:,2], name='event', labels=labels))
    return dataset

コード例 #4

0

ファイルを表示

ファイル: VaR.py プロジェクト: Niyu-Jia/Quant-Analysis

def VaR(w, portfolio, factor_list, N):
    """
    w: column vector,weights of portfolio
    coef: an (k+1)xn array of coefficients of n stocks against the k factors,including intercepts
    factor_list: list of factors which will be used for simulation; input eg:[VIX,EIRX,VOL]
    N: trials of simulation
    """

    #normalize your weights for sum of 1
    w = [float(i) / sum(w) for i in w]
    all_factor = data.factor(factor_list)
    regressor = regression.regression(portfolio, N)

    #assign the coef and simulated residuals
    coef = regressor.coef
    matrix_residuals = regressor.simulated_residual.T

    #simulate multivariate normal distribution of these factors
    #the multivariate_normal should be a 2_D array of kxN, k is the number of factors
    multivariate_normal = (all_factor.simulation(N)).T
    #print(multivariate_normal)

    #portfolio return= intercept+ beta*factor+error
    #assumption: iid error,will be simulated from iid normal
    #directly generate N trials in the matrix
    matrix_intercept = np.array([coef[:, 0]] * N).T
    matrix_beta = coef[:, 1:]

    stock_return = matrix_intercept + np.dot(
        matrix_beta, multivariate_normal) + matrix_residuals

    matrix_w = np.array([w] * N).T
    portfolio_return = np.multiply(matrix_w, stock_return)

    #sum of every stock return to get portfolio return simulation
    sum_return = np.sum(portfolio_return, axis=0)
    sort_return = np.sort(sum_return)
    #print(sort_return)

    #plt.plot(np.arange(0,N,1),sum_return)
    #cutoff point:
    cutoff = int(N * 0.05)
    #use cutoff point's return as the VaR
    var = sort_return[cutoff]
    #print("VaR:",abs(var))
    return abs(var)

コード例 #5

0

ファイルを表示

ファイル: structure.py プロジェクト: teonbrooks/Eelbrain

    def __init__(self, Y, X, match=None, sub=None, match_func=np.mean, ds=None):
        """
        Parameters
        ----------

        Y : var, ndvar
            dependent measurement
        X : categorial
            factor or interaction
        match :
            factor on which cases are matched (i.e. subject for a repeated
            measures comparisons). If several data points with the same
            case fall into one cell of X, they are combined using
            match_func. If match is not None, celltable.groups contains the
            {Xcell -> [match values of data points], ...} mapping corres-
            ponding to self.data
        sub : bool array
            Bool array of length N specifying which cases to include
        match_func : callable
            see match
        ds : dataset
            If a dataset is specified, input items (Y / X / match / sub) can
            be str instead of data-objects, in which case they will be
            retrieved from the dataset.


        Examples
        --------

        Split a repeated-measure variable Y into cells defined by the
        interaction of A and B::

            >>> c = celltable(Y, A % B, match=subject)

        """
        if isinstance(Y, basestring):
            Y = ds.eval(Y)
        if isinstance(X, basestring):
            X = ds.eval(X)
        if isinstance(match, basestring):
            match = ds[match]
        if isinstance(sub, basestring):
            sub = ds.eval(sub)

        if _data.iscategorial(Y) or _data.isndvar(Y):
            if sub is not None:
                Y = Y[sub]
        else:
            Y = _data.asvar(Y, sub)

        if X is not None:
            X = _data.ascategorial(X, sub)

        if match:
            match = _data.asfactor(match, sub)
            assert len(match) == len(Y)
            self.groups = {}

        # save args
        self.X = X
        self.Y = Y
        self.sub = sub
        self.match = match

        # extract cells and cell data
        self.data = {}
        self.data_indexes = {}
        if X is None:
            self.data[None] = Y
            self.data_indexes[None] = np.ones(len(Y), dtype=bool)
            self.cells = [None]
            return

        self.cells = X.cells

        for cell in self.cells:
            self.data_indexes[cell] = cell_index = X == cell
            newdata = Y[cell_index]
            if match:
                group = match[cell_index]
                values = group.cells

                # sort
                if len(values) < len(group):
                    newdata = newdata.compress(group, func=match_func)
                    group = _data.factor(values, name=group.name)
                else:
                    group_ids = [group == v for v in values]
                    sort_arg = np.sum(group_ids * np.arange(len(values)), axis=0)
                    newdata = newdata[sort_arg]
                    group = group[sort_arg]

                self.groups[cell] = group

            self.data[cell] = newdata

        if match:
            # determine which cells compare values for dependent values on
            # match_variable
            #            n_cells = len(self.indexes)
            #            self.within = np.empty((n_cells, n_cells), dtype=bool)
            self.within = {}
            for cell1 in self.cells:
                for cell2 in self.cells:
                    if cell1 == cell2:
                        pass
                    else:
                        v = self.groups[cell1] == self.groups[cell2]
                        if v is not False:
                            v = all(v)
                        self.within[cell1, cell2] = v
                        self.within[cell2, cell1] = v
            self.all_within = np.all(self.within.values())
        else:
            self.all_within = False

コード例 #6

0

ファイルを表示

ファイル: load.py プロジェクト: kriek197/Eelbrain

def fiff(raw, events, conditions, varname='condition', dataname='MEG',
         tstart=-.2, tstop=.6, properties=None, name=None, c_colors={},
         sensorsname='fiff-sensors'):
    """
    Loads data directly when two files (raw and events) are provided 
    separately.
    
    conditions : dict
        ID->name dictionary of conditions that should be imported
    event : str
        path to the event file
    properties : dict
        set properties in addition to the defaults
    raw : str
        path to the raw file
    varname : str
        variable name that will contain the condition value 
    
    """
    if name is None:
        name = os.path.basename(raw)
    
    raw = mne.fiff.Raw(raw)
    
    # parse sensor net
    sensor_list = []
    for ch in raw.info['chs']:
        ch_name = ch['ch_name']
        if ch_name.startswith('MEG'):
            x, y, z = ch['loc'][:3]
            sensor_list.append([x, y, z, ch_name])
    sensor_net = sensors.sensor_net(sensor_list, name=sensorsname)
    
    events = mne.read_events(events)
    picks = mne.fiff.pick_types(raw.info, meg=True, eeg=False, stim=False, 
                                eog=False, include=[], exclude=[])
    
    data = []
    c_x = []
    
    # read the data
    for ID in conditions:
        epochs = mne.Epochs(raw, events, ID, tstart, tstop, picks=picks)
        samplingrate = epochs.info['sfreq'][0]
        
        # data
        c_data = epochs.get_data()        # n_ep, n_ch, n_t 
        
        for epoch in c_data:
            data.append(epoch.T)
#        data.append(c_data.T)

        T = epochs.times
        
        # conditions variable
        n_ep = len(c_data)
        c_x.extend([ID] * n_ep)
    
    # construct the dataset
    c_factor = _data.factor(c_x, name=varname, labels=conditions, 
                            colors=c_colors, retain_label_codes=True)
    
    props = {'samplingrate': samplingrate}
    props.update(_default_fiff_properties)
    if properties is not None:
        props.update(properties)
    
    data = np.array(data)
#    data = np.concatenate(data, axis=0)
    
    timevar = _data.var(T, 'time')
    dims = (timevar, sensor_net)
    
    Y = _data.ndvar(dims, data, properties=props, name=dataname)
    
    dataset = _data.dataset(Y, c_factor, name=name, default_DV=dataname)
    return dataset

コード例 #7

0

ファイルを表示

ファイル: design.py プロジェクト: teonbrooks/Eelbrain

def _try_make_random_factor(name, values, ds, rand, balance, urn, 
                            require_exact_balance):
    N_values = len(values)
    x = np.empty(ds.N, dtype=np.uint8)
    cells = dict(enumerate(values))
        
    if balance:
        groups = _data.interaction(balance)
        regions = groups.as_factor()
        
        # for now, they have to be of equal length
        region_lens = [np.sum(regions==cell) for cell in regions.cells]
        if len(np.unique(region_lens)) > 1:
            raise NotImplementedError
        
        region_len = region_lens[0]
    else:
        regions = _data.factor('?'*ds.N, "regions")
        region_len = ds.N
    
    # generate random values with equal number of each value
    exact_balance = not bool(region_len % N_values)
    if exact_balance:
        values = np.arange(region_len, dtype=np.uint8) % N_values
    else:
        if require_exact_balance:
            raise ValueError("No exact balancing possible")
        _len = (region_len // N_values + 1) * N_values
        values = np.arange(_len, dtype=np.uint8) % N_values
        
        # drop trailing values randomly
        if rand:# and _randomize:
            np.random.shuffle(values[-N_values:])
        values = values[:ds.N]
    
    
    # cycle through values of the balance containers
    for region in regions.cells:
        if rand:# and _randomize:
            np.random.shuffle(values)
        
        # indexes into the current out array rows
        c_index = (regions == region)
        c_indexes = np.where(c_index)[0] # location 
        
        if urn: # the Urn has been drawn from already
            if not rand:
                raise NotImplementedError
            
            # source and target indexes
            for si, ti in zip(range(region_len), c_indexes):
                if any(cells[values[si]] == u[ti] for u in urn):
                    
                    # randomized order in which to test other 
                    # values for switching
                    switch_order = range(region_len)
                    switch_order.pop(si)
                    np.random.shuffle(switch_order)
                    
                    switched = False
                    for si_switch in switch_order:
                        ti_switch = c_indexes[si_switch]
#                        a = values[si] not in out[ti_switch, urn_indexes] 
#                        b = values[si_switch] not in out[ti, urn_indexes]
                        a = any(cells[values[si]] == u[ti_switch] for u in urn) 
                        b = any(cells[values[si_switch]] == u[ti] for u in urn)
                        if not (a or b):
                            values[[si, si_switch]] = values[[si_switch, si]]
                            switched = True
                            break
                    
                    if not switched:
                        msg = "No value found for switching! Try again."
                        raise RandomizationError(msg)
        
        x[c_index] = values
    return _data.factor(x, name, labels=cells)