def add_missing(base, name='missing', values=None): """ returns a factor that contains the values that are not contained in a group of other factors. base : list of factors factors that together, on each case, contain all the values spare one. values : list of str | None values for the factor. If None, the first factor's values are used. """ N = len(base[0]) if values is None: values = base[0].cells cells = dict(enumerate(values)) grid = np.empty((N, len(values)), dtype=bool) for i, v in cells.iteritems(): grid[:,i] = np.all([f!=v for f in base], axis=0) out = _data.factor('?'*N, name=name) for i in cells: out[grid[:,i]] = cells[i] return out
def get_permutated_dataset(variables, count='caseID', randomize=False): # sort variables perm_rand = [] # permutated and randomized perm_nonrand = [] # permutated and not randomized for v in variables: if v.is_rand: perm_rand.append(v) else: perm_nonrand.append(v) # variables = perm_rand + perm_nonrand # set the variables IDs for i,v in enumerate(variables): v._set_list_ID(i) perm_n = [v.Ndraw for v in variables] n_trials = np.prod(perm_n) n_properties = len(variables) out = np.empty((n_trials, n_properties), dtype=np.uint8) # permutatet variables for i,v in enumerate(variables): t = np.prod(perm_n[:i]) r = np.prod(perm_n[i+1:]) if len(v.urn) == 0: out[:,i] = np.tile(np.arange(v.N), t).repeat(r) else: base = np.arange(v.N) for v0 in variables[:i]: if v0 in v.urn: base = np.ravel([base[base!=j] for j in xrange(v.N)]) else: base = np.tile(base, v.Ndraw) out[:,i] = np.repeat(base, r) if randomize: # shuffle those perm factors that should be shuffled n_rand_bins = np.prod([v.Ndraw for v in perm_nonrand]) rand_bin_len = int(n_trials / n_rand_bins) for i in xrange(0, n_trials, rand_bin_len): np.random.shuffle(out[i:i+rand_bin_len]) # create dataset ds = _data.dataset(name='Design') for v in variables: x = out[:,v.ID] f = _data.factor(x, v.name, labels=v.cells) ds.add(f) if count: ds.add(_data.var(np.arange(ds.N), count)) return ds
def fiff_event_file(path, labels={}): events = mne.read_events(path).reshape((-1,6)) name = os.path.basename(path) assert all(events[:,1] == events[:,5]) assert all(events[:,2] == events[:,4]) istart = _data.var(events[:,0], name='i_start') istop = _data.var(events[:,3], name='i_stop') event = _data.var(events[:,2], name='eventID') dataset = _data.dataset(event, istart, istop, name=name) if labels: dataset.add(_data.factor(events[:,2], name='event', labels=labels)) return dataset
def VaR(w, portfolio, factor_list, N): """ w: column vector,weights of portfolio coef: an (k+1)xn array of coefficients of n stocks against the k factors,including intercepts factor_list: list of factors which will be used for simulation; input eg:[VIX,EIRX,VOL] N: trials of simulation """ #normalize your weights for sum of 1 w = [float(i) / sum(w) for i in w] all_factor = data.factor(factor_list) regressor = regression.regression(portfolio, N) #assign the coef and simulated residuals coef = regressor.coef matrix_residuals = regressor.simulated_residual.T #simulate multivariate normal distribution of these factors #the multivariate_normal should be a 2_D array of kxN, k is the number of factors multivariate_normal = (all_factor.simulation(N)).T #print(multivariate_normal) #portfolio return= intercept+ beta*factor+error #assumption: iid error,will be simulated from iid normal #directly generate N trials in the matrix matrix_intercept = np.array([coef[:, 0]] * N).T matrix_beta = coef[:, 1:] stock_return = matrix_intercept + np.dot( matrix_beta, multivariate_normal) + matrix_residuals matrix_w = np.array([w] * N).T portfolio_return = np.multiply(matrix_w, stock_return) #sum of every stock return to get portfolio return simulation sum_return = np.sum(portfolio_return, axis=0) sort_return = np.sort(sum_return) #print(sort_return) #plt.plot(np.arange(0,N,1),sum_return) #cutoff point: cutoff = int(N * 0.05) #use cutoff point's return as the VaR var = sort_return[cutoff] #print("VaR:",abs(var)) return abs(var)
def __init__(self, Y, X, match=None, sub=None, match_func=np.mean, ds=None): """ Parameters ---------- Y : var, ndvar dependent measurement X : categorial factor or interaction match : factor on which cases are matched (i.e. subject for a repeated measures comparisons). If several data points with the same case fall into one cell of X, they are combined using match_func. If match is not None, celltable.groups contains the {Xcell -> [match values of data points], ...} mapping corres- ponding to self.data sub : bool array Bool array of length N specifying which cases to include match_func : callable see match ds : dataset If a dataset is specified, input items (Y / X / match / sub) can be str instead of data-objects, in which case they will be retrieved from the dataset. Examples -------- Split a repeated-measure variable Y into cells defined by the interaction of A and B:: >>> c = celltable(Y, A % B, match=subject) """ if isinstance(Y, basestring): Y = ds.eval(Y) if isinstance(X, basestring): X = ds.eval(X) if isinstance(match, basestring): match = ds[match] if isinstance(sub, basestring): sub = ds.eval(sub) if _data.iscategorial(Y) or _data.isndvar(Y): if sub is not None: Y = Y[sub] else: Y = _data.asvar(Y, sub) if X is not None: X = _data.ascategorial(X, sub) if match: match = _data.asfactor(match, sub) assert len(match) == len(Y) self.groups = {} # save args self.X = X self.Y = Y self.sub = sub self.match = match # extract cells and cell data self.data = {} self.data_indexes = {} if X is None: self.data[None] = Y self.data_indexes[None] = np.ones(len(Y), dtype=bool) self.cells = [None] return self.cells = X.cells for cell in self.cells: self.data_indexes[cell] = cell_index = X == cell newdata = Y[cell_index] if match: group = match[cell_index] values = group.cells # sort if len(values) < len(group): newdata = newdata.compress(group, func=match_func) group = _data.factor(values, name=group.name) else: group_ids = [group == v for v in values] sort_arg = np.sum(group_ids * np.arange(len(values)), axis=0) newdata = newdata[sort_arg] group = group[sort_arg] self.groups[cell] = group self.data[cell] = newdata if match: # determine which cells compare values for dependent values on # match_variable # n_cells = len(self.indexes) # self.within = np.empty((n_cells, n_cells), dtype=bool) self.within = {} for cell1 in self.cells: for cell2 in self.cells: if cell1 == cell2: pass else: v = self.groups[cell1] == self.groups[cell2] if v is not False: v = all(v) self.within[cell1, cell2] = v self.within[cell2, cell1] = v self.all_within = np.all(self.within.values()) else: self.all_within = False
def fiff(raw, events, conditions, varname='condition', dataname='MEG', tstart=-.2, tstop=.6, properties=None, name=None, c_colors={}, sensorsname='fiff-sensors'): """ Loads data directly when two files (raw and events) are provided separately. conditions : dict ID->name dictionary of conditions that should be imported event : str path to the event file properties : dict set properties in addition to the defaults raw : str path to the raw file varname : str variable name that will contain the condition value """ if name is None: name = os.path.basename(raw) raw = mne.fiff.Raw(raw) # parse sensor net sensor_list = [] for ch in raw.info['chs']: ch_name = ch['ch_name'] if ch_name.startswith('MEG'): x, y, z = ch['loc'][:3] sensor_list.append([x, y, z, ch_name]) sensor_net = sensors.sensor_net(sensor_list, name=sensorsname) events = mne.read_events(events) picks = mne.fiff.pick_types(raw.info, meg=True, eeg=False, stim=False, eog=False, include=[], exclude=[]) data = [] c_x = [] # read the data for ID in conditions: epochs = mne.Epochs(raw, events, ID, tstart, tstop, picks=picks) samplingrate = epochs.info['sfreq'][0] # data c_data = epochs.get_data() # n_ep, n_ch, n_t for epoch in c_data: data.append(epoch.T) # data.append(c_data.T) T = epochs.times # conditions variable n_ep = len(c_data) c_x.extend([ID] * n_ep) # construct the dataset c_factor = _data.factor(c_x, name=varname, labels=conditions, colors=c_colors, retain_label_codes=True) props = {'samplingrate': samplingrate} props.update(_default_fiff_properties) if properties is not None: props.update(properties) data = np.array(data) # data = np.concatenate(data, axis=0) timevar = _data.var(T, 'time') dims = (timevar, sensor_net) Y = _data.ndvar(dims, data, properties=props, name=dataname) dataset = _data.dataset(Y, c_factor, name=name, default_DV=dataname) return dataset
def _try_make_random_factor(name, values, ds, rand, balance, urn, require_exact_balance): N_values = len(values) x = np.empty(ds.N, dtype=np.uint8) cells = dict(enumerate(values)) if balance: groups = _data.interaction(balance) regions = groups.as_factor() # for now, they have to be of equal length region_lens = [np.sum(regions==cell) for cell in regions.cells] if len(np.unique(region_lens)) > 1: raise NotImplementedError region_len = region_lens[0] else: regions = _data.factor('?'*ds.N, "regions") region_len = ds.N # generate random values with equal number of each value exact_balance = not bool(region_len % N_values) if exact_balance: values = np.arange(region_len, dtype=np.uint8) % N_values else: if require_exact_balance: raise ValueError("No exact balancing possible") _len = (region_len // N_values + 1) * N_values values = np.arange(_len, dtype=np.uint8) % N_values # drop trailing values randomly if rand:# and _randomize: np.random.shuffle(values[-N_values:]) values = values[:ds.N] # cycle through values of the balance containers for region in regions.cells: if rand:# and _randomize: np.random.shuffle(values) # indexes into the current out array rows c_index = (regions == region) c_indexes = np.where(c_index)[0] # location if urn: # the Urn has been drawn from already if not rand: raise NotImplementedError # source and target indexes for si, ti in zip(range(region_len), c_indexes): if any(cells[values[si]] == u[ti] for u in urn): # randomized order in which to test other # values for switching switch_order = range(region_len) switch_order.pop(si) np.random.shuffle(switch_order) switched = False for si_switch in switch_order: ti_switch = c_indexes[si_switch] # a = values[si] not in out[ti_switch, urn_indexes] # b = values[si_switch] not in out[ti, urn_indexes] a = any(cells[values[si]] == u[ti_switch] for u in urn) b = any(cells[values[si_switch]] == u[ti] for u in urn) if not (a or b): values[[si, si_switch]] = values[[si_switch, si]] switched = True break if not switched: msg = "No value found for switching! Try again." raise RandomizationError(msg) x[c_index] = values return _data.factor(x, name, labels=cells)