def chain_dot(*arrs): """ Returns the dot product of the given matrices. Parameters ---------- arrs: argument list of ndarray Returns ------- Dot product of all arguments. Examples -------- >>> import numpy as np >>> from statsmodels.tools import chain_dot >>> A = np.arange(1,13).reshape(3,4) >>> B = np.arange(3,15).reshape(4,3) >>> C = np.arange(5,8).reshape(3,1) >>> chain_dot(A,B,C) array([[1820], [4300], [6780]]) """ return reduce(lambda x, y: np.dot(y, x), arrs[::-1])
def add_dict(self, d, ncols=2, align='l', float_format="%.4f"): '''Add the contents of a Dict to summary table Parameters ---------- d : dict Keys and values are automatically coerced to strings with str(). Users are encouraged to format them before using add_dict. ncols: int Number of columns of the output table align : string Data alignment (l/c/r) ''' keys = [_formatter(x, float_format) for x in iterkeys(d)] vals = [_formatter(x, float_format) for x in itervalues(d)] data = np.array(lzip(keys, vals)) if data.shape[0] % ncols != 0: pad = ncols - (data.shape[0] % ncols) data = np.vstack([data, np.array(pad * [['', '']])]) data = np.split(data, ncols) data = reduce(lambda x, y: np.hstack([x, y]), data) self.add_array(data, align=align)
def safe_version(module, attr='__version__'): if not isinstance(attr, list): attr = [attr] try: return reduce(getattr, [module] + attr) except AttributeError: return "Cannot detect version"
def summary_col(results, float_format='%.4f', model_names=[], stars=True, more_info=None, regressor_order=[],show='t',title=None): if not isinstance(results, list): results = [results] cols = [_col_params(x, stars=stars, float_format=float_format,show=show) for x in results] # Unique column names (pandas has problems merging otherwise) if model_names: colnames = _make_unique(model_names) else: colnames = _make_unique([x.columns[0] for x in cols]) for i in range(len(cols)): cols[i].columns = [colnames[i]] merg = lambda x, y: x.merge(y, how='outer', right_index=True, left_index=True) summ = reduce(merg, cols) # if regressor_order: if not regressor_order: regressor_order = ['const'] varnames = summ.index.get_level_values(0).tolist() ordered = [x for x in regressor_order if x in varnames] unordered = [x for x in varnames if x not in regressor_order + ['']] # Note: np.unique can disrupt the original order of list 'unordered'. # Then pd.Series().unique() works well. # order = ordered + list(np.unique(unordered)) order = ordered + list(pd.Series(unordered).unique()) f = lambda idx: sum([[x + 'coef', x + 'stde'] for x in idx], []) # summ.index = f(np.unique(varnames)) summ.index = f(pd.Series(varnames).unique()) summ = summ.reindex(f(order)) summ.index = [x[:-4] for x in summ.index] idx = pd.Series(lrange(summ.shape[0])) % 2 == 1 summ.index = np.where(idx, '', summ.index.get_level_values(0)) summ = summ.fillna('') # add infos about the models. # if info_dict: # cols = [_col_info(x, info_dict.get(x.model.__class__.__name__, # info_dict)) for x in results] # else: # cols = [_col_info(x, getattr(x, "default_model_infos", None)) for x in # results] cols = [_col_info(x,more_info=more_info) for x in results]
def _nan_rows(*arrs): """ Returns a boolean array which is True where any of the rows in any of the _2d_ arrays in arrs are NaNs. Inputs can be any mixture of Series, DataFrames or array-like. """ if len(arrs) == 1: arrs += ([[False]],) def _nan_row_maybe_two_inputs(x, y): # check for dtype bc dataframe has dtypes x_is_boolean_array = hasattr(x, 'dtype') and x.dtype == bool and x return np.logical_or(_asarray_2d_null_rows(x), (x_is_boolean_array | _asarray_2d_null_rows(y))) return reduce(_nan_row_maybe_two_inputs, arrs).squeeze()
print(np.linalg.cholesky(omega)) ev, evec = np.linalg.eigh(omega) #eig doesn't work omegainv = np.dot(evec, (1 / ev * evec).T) omegainv2 = np.linalg.inv(omega) omegacomp = np.dot(evec, (ev * evec).T) print(np.max(np.abs(omegacomp - omega))) #check #print(np.dot(omegainv,omega) print(np.max(np.abs(np.dot(omegainv, omega) - np.eye(nobs)))) omegainvhalf = evec / np.sqrt(ev) #not sure whether ev shouldn't be column print(np.max(np.abs(np.dot(omegainvhalf, omegainvhalf.T) - omegainv))) # now we can use omegainvhalf in GLS (instead of the cholesky) sigmas2 = np.array([sigmagr, sigmape, sigma]) groups2 = np.column_stack((groups, periods)) omega_, omegainv_, omegainvhalf_ = repanel_cov(groups2, sigmas2) print(np.max(np.abs(omega_ - omega))) print(np.max(np.abs(omegainv_ - omegainv))) print(np.max(np.abs(omegainvhalf_ - omegainvhalf))) # notation Baltagi (3rd) section 9.4.1 (Fixed Effects Model) Pgr = reduce( np.dot, [dummygr, np.linalg.inv(np.dot(dummygr.T, dummygr)), dummygr.T]) Qgr = np.eye(nobs) - Pgr # within group effect: np.dot(Qgr, groups) # but this is not memory efficient, compared to groupstats print(np.max(np.abs(np.dot(Qgr, groups))))
def summary_col(results, float_format='%.4f', model_names=[], stars=False, info_dict=None, regressor_order=[]): """ Summarize multiple results instances side-by-side (coefs and SEs) Parameters ---------- results : statsmodels results instance or list of result instances float_format : string float format for coefficients and standard errors Default : '%.4f' model_names : list of strings of length len(results) if the names are not unique, a roman number will be appended to all model names stars : bool print significance stars info_dict : dict dict of lambda functions to be applied to results instances to retrieve model info. To use specific information for different models, add a (nested) info_dict with model name as the key. Example: `info_dict = {"N":..., "R2": ..., "OLS":{"R2":...}}` would only show `R2` for OLS regression models, but additionally `N` for all other results. Default : None (use the info_dict specified in result.default_model_infos, if this property exists) regressor_order : list of strings list of names of the regressors in the desired order. All regressors not specified will be appended to the end of the list. """ if not isinstance(results, list): results = [results] cols = [ _col_params(x, stars=stars, float_format=float_format) for x in results ] # Unique column names (pandas has problems merging otherwise) if model_names: colnames = _make_unique(model_names) else: colnames = _make_unique([x.columns[0] for x in cols]) for i in range(len(cols)): cols[i].columns = [colnames[i]] merg = lambda x, y: x.merge( y, how='outer', right_index=True, left_index=True) summ = reduce(merg, cols) if regressor_order: varnames = summ.index.get_level_values(0).tolist() ordered = [x for x in regressor_order if x in varnames] unordered = [x for x in varnames if x not in regressor_order + ['']] order = ordered + list(np.unique(unordered)) f = lambda idx: sum([[x + 'coef', x + 'stde'] for x in idx], []) summ.index = f(np.unique(varnames)) summ = summ.reindex(f(order)) summ.index = [x[:-4] for x in summ.index] idx = pd.Series(lrange(summ.shape[0])) % 2 == 1 summ.index = np.where(idx, '', summ.index.get_level_values(0)) # add infos about the models. if info_dict: cols = [ _col_info(x, info_dict.get(x.model.__class__.__name__, info_dict)) for x in results ] else: cols = [ _col_info(x, getattr(x, "default_model_infos", None)) for x in results ] # use unique column names, otherwise the merge will not succeed for df, name in zip(cols, _make_unique([df.columns[0] for df in cols])): df.columns = [name] merg = lambda x, y: x.merge( y, how='outer', right_index=True, left_index=True) info = reduce(merg, cols) dat = pd.DataFrame(np.vstack([summ, info])) # pd.concat better, but error dat.columns = summ.columns dat.index = pd.Index(summ.index.tolist() + info.index.tolist()) summ = dat summ = summ.fillna('') smry = Summary() smry.add_df(summ, header=True, align='l') smry.add_text('Standard errors in parentheses.') if stars: smry.add_text('* p<.1, ** p<.05, ***p<.01') return smry
print(np.max(np.abs(omegacomp - omega))) #check #print(np.dot(omegainv,omega) print(np.max(np.abs(np.dot(omegainv,omega) - np.eye(nobs)))) omegainvhalf = evec/np.sqrt(ev) #not sure whether ev shouldn't be column print(np.max(np.abs(np.dot(omegainvhalf,omegainvhalf.T) - omegainv))) # now we can use omegainvhalf in GLS (instead of the cholesky) sigmas2 = np.array([sigmagr, sigmape, sigma]) groups2 = np.column_stack((groups, periods)) omega_, omegainv_, omegainvhalf_ = repanel_cov(groups2, sigmas2) print(np.max(np.abs(omega_ - omega))) print(np.max(np.abs(omegainv_ - omegainv))) print(np.max(np.abs(omegainvhalf_ - omegainvhalf))) # notation Baltagi (3rd) section 9.4.1 (Fixed Effects Model) Pgr = reduce(np.dot,[dummygr, np.linalg.inv(np.dot(dummygr.T, dummygr)),dummygr.T]) Qgr = np.eye(nobs) - Pgr # within group effect: np.dot(Qgr, groups) # but this is not memory efficient, compared to groupstats print(np.max(np.abs(np.dot(Qgr, groups))))
def _add_datetimes(dates): return reduce(lambda x, y: y + x, dates)
def summary_col(results, float_format='%.4f', model_names=[], stars=True, more_info=None, regressor_order=[],show='t',title=None): # I added the parameter 'show' and changed the default of 'stars' into 'True', # then renamed the dict parameter 'info_dict' as a list one 'more_info' # finally assigned the regressor_order a initial value ['const'] """ Summarize multiple results instances side-by-side (coefs and SEs) Parameters ---------- results : statsmodels results instance or list of result instances float_format : string float format for coefficients and standard errors Default : '%.4f' model_names : list of strings of length len(results) if the names are not unique, a roman number will be appended to all model names stars : bool print significance stars info_dict : dict dict of lambda functions to be applied to results instances to retrieve model info. To use specific information for different models, add a (nested) info_dict with model name as the key. Example: `info_dict = {"N":..., "R2": ..., "OLS":{"R2":...}}` would only show `R2` for OLS regression models, but additionally `N` for all other results. Default : None (use the info_dict specified in result.default_model_infos, if this property exists) regressor_order : list of strings list of names of the regressors in the desired order. All regressors not specified will be appended to the end of the list. """ if not isinstance(results, list): results = [results] cols = [_col_params(x, stars=stars, float_format=float_format,show=show) for x in results] # Unique column names (pandas has problems merging otherwise) if model_names: colnames = _make_unique(model_names) else: colnames = _make_unique([x.columns[0] for x in cols]) for i in range(len(cols)): cols[i].columns = [colnames[i]] merg = lambda x, y: x.merge(y, how='outer', right_index=True, left_index=True) summ = reduce(merg, cols) # if regressor_order: if not regressor_order: regressor_order = ['const'] varnames = summ.index.get_level_values(0).tolist() ordered = [x for x in regressor_order if x in varnames] unordered = [x for x in varnames if x not in regressor_order + ['']] # Note: np.unique can disrupt the original order of list 'unordered'. # Then pd.Series().unique() works well. # order = ordered + list(np.unique(unordered)) order = ordered + list(pd.Series(unordered).unique()) f = lambda idx: sum([[x + 'coef', x + 'stde'] for x in idx], []) # summ.index = f(np.unique(varnames)) summ.index = f(pd.Series(varnames).unique()) summ = summ.reindex(f(order)) summ.index = [x[:-4] for x in summ.index] idx = pd.Series(lrange(summ.shape[0])) % 2 == 1 summ.index = np.where(idx, '', summ.index.get_level_values(0)) summ = summ.fillna('') # add infos about the models. # if info_dict: # cols = [_col_info(x, info_dict.get(x.model.__class__.__name__, # info_dict)) for x in results] # else: # cols = [_col_info(x, getattr(x, "default_model_infos", None)) for x in # results] cols = [_col_info(x,more_info=more_info) for x in results] # use unique column names, otherwise the merge will not succeed for df , name in zip(cols, _make_unique([df.columns[0] for df in cols])): df.columns = [name] merg = lambda x, y: x.merge(y, how='outer', right_index=True, left_index=True) info = reduce(merg, cols) info.columns = summ.columns info = info.fillna('') # dat = pd.DataFrame(np.vstack([summ, info])) # pd.concat better, but error # dat.columns = summ.columns # dat.index = pd.Index(summ.index.tolist() + info.index.tolist()) # summ = dat # summ = summ.fillna('') # smry = Summary() # smry.add_df(summ, header=True, align='l') # smry.add_text('Standard errors in parentheses.') # if stars: # smry.add_text('* p<.1, ** p<.05, ***p<.01')*p<.01') # return smry if show is 't': note = ['\t t statistics in parentheses.'] if show is 'se': note = ['\t Std. error in parentheses.'] if show is 'p': note = ['\t pvalues in parentheses.'] if stars: note += ['\t * p<.1, ** p<.05, ***p<.01'] #Here I tried two ways to put extra text in index-location # or columns-location,finally found the former is better. # note_df = pd.DataFrame(note,index=['note']+['']*(len(note)-1),columns=[summ.columns[0]]) note_df = pd.DataFrame([ ],index=['note:']+note,columns=summ.columns).fillna('') # summ_all = pd.concat([summ,info,note_df],axis=0) # I construct a title DataFrame and adjust the location of title corresponding to the length of columns. if title is not None: title = str(title) else: title = '\t Results Summary' # Here I tried to construct a title DataFrame and # adjust the location of title corresponding to the length of columns. # But I failed because of not good printing effect. # col_len = len(summ.columns) # fake_data = ['']*col_len # if col_len % 2 == 1: # from math import ceil # i = ceil(col_len/2) # else: # i = int(col_len/2) # fake_data[i-1] = title # title_df = pd.DataFrame([fake_data],index=[''],columns=summ.columns).fillna('') title_df = pd.DataFrame([],index=[title],columns=summ.columns).fillna('') smry = Summary() smry.add_df(title_df,header=False,align='l') smry.add_df(summ, header=True, align='l') smry.add_df(info, header=False, align='l') smry.add_df(note_df, header=False, align='l') return smry
# add infos about the models. # if info_dict: # cols = [_col_info(x, info_dict.get(x.model.__class__.__name__, # info_dict)) for x in results] # else: # cols = [_col_info(x, getattr(x, "default_model_infos", None)) for x in # results] cols = [_col_info(x,more_info=more_info) for x in results] # use unique column names, otherwise the merge will not succeed for df , name in zip(cols, _make_unique([df.columns[0] for df in cols])): df.columns = [name] merg = lambda x, y: x.merge(y, how='outer', right_index=True, left_index=True) info = reduce(merg, cols) info.columns = summ.columns info = info.fillna('') # dat = pd.DataFrame(np.vstack([summ, info])) # pd.concat better, but error # dat.columns = summ.columns # dat.index = pd.Index(summ.index.tolist() + info.index.tolist()) # summ = dat # summ = summ.fillna('') # smry = Summary() # smry.add_df(summ, header=True, align='l') # smry.add_text('Standard errors in parentheses.') # if stars: # smry.add_text('* p<.1, ** p<.05, ***p<.01')*p<.01') # return smry
def summary_col(results, float_format='%.4f', model_names=(), stars=False, info_dict=None, regressor_order=(), drop_omitted=False): """ Summarize multiple results instances side-by-side (coefs and SEs) Parameters ---------- results : statsmodels results instance or list of result instances float_format : string, optional float format for coefficients and standard errors Default : '%.4f' model_names : list of strings, optional Must have same length as the number of results. If the names are not unique, a roman number will be appended to all model names stars : bool print significance stars info_dict : dict dict of functions to be applied to results instances to retrieve model info. To use specific information for different models, add a (nested) info_dict with model name as the key. Example: `info_dict = {"N":..., "R2": ..., "OLS":{"R2":...}}` would only show `R2` for OLS regression models, but additionally `N` for all other results. Default : None (use the info_dict specified in result.default_model_infos, if this property exists) regressor_order : list of strings, optional list of names of the regressors in the desired order. All regressors not specified will be appended to the end of the list. drop_omitted : bool, optional Includes regressors that are not specified in regressor_order. If False, regressors not specified will be appended to end of the list. If True, only regressors in regressors_list will be included. """ if not isinstance(results, list): results = [results] cols = [_col_params(x, stars=stars, float_format=float_format) for x in results] # Unique column names (pandas has problems merging otherwise) if model_names: colnames = _make_unique(model_names) else: colnames = _make_unique([x.columns[0] for x in cols]) for i in range(len(cols)): cols[i].columns = [colnames[i]] merg = lambda x, y: x.merge(y, how='outer', right_index=True, left_index=True) summ = reduce(merg, cols) if regressor_order: varnames = summ.index.get_level_values(0).tolist() ordered = [x for x in regressor_order if x in varnames] unordered = [x for x in varnames if x not in regressor_order + ['']] order = ordered + list(np.unique(unordered)) f = lambda idx: sum([[x + 'coef', x + 'stde'] for x in idx], []) summ.index = f(np.unique(varnames)) summ = summ.reindex(f(order)) summ.index = [x[:-4] for x in summ.index] if drop_omitted: summ = summ.loc[regressor_order] idx = pd.Series(lrange(summ.shape[0])) % 2 == 1 summ.index = np.where(idx, '', summ.index.get_level_values(0)) # add infos about the models. if info_dict: cols = [_col_info(x, info_dict.get(x.model.__class__.__name__, info_dict)) for x in results] else: cols = [_col_info(x, getattr(x, "default_model_infos", None)) for x in results] # use unique column names, otherwise the merge will not succeed for df, name in zip(cols, _make_unique([df.columns[0] for df in cols])): df.columns = [name] merg = lambda x, y: x.merge(y, how='outer', right_index=True, left_index=True) info = reduce(merg, cols) dat = pd.DataFrame(np.vstack([summ, info])) # pd.concat better, but error dat.columns = summ.columns dat.index = pd.Index(summ.index.tolist() + info.index.tolist()) summ = dat summ = summ.fillna('') smry = Summary() smry._merge_latex = True smry.add_df(summ, header=True, align='l') smry.add_text('Standard errors in parentheses.') if stars: smry.add_text('* p<.1, ** p<.05, ***p<.01') return smry
def _add_datetimes(dates): return reduce(lambda x, y: y+x, dates)
def summary_col(results, float_format='%.4f', model_names=[], stars=True, more_info=None, regressor_order=[], show='t', title=None): if not isinstance(results, list): results = [results] cols = [ _col_params(x, stars=stars, float_format=float_format, show=show) for x in results ] # Unique column names (pandas has problems merging otherwise) if model_names: colnames = _make_unique(model_names) else: colnames = _make_unique([x.columns[0] for x in cols]) for i in range(len(cols)): cols[i].columns = [colnames[i]] merg = lambda x, y: x.merge( y, how='outer', right_index=True, left_index=True) summ = reduce(merg, cols) # if regressor_order: if not regressor_order: regressor_order = ['const'] varnames = summ.index.get_level_values(0).tolist() ordered = [x for x in regressor_order if x in varnames] unordered = [x for x in varnames if x not in regressor_order + ['']] # Note: np.unique can disrupt the original order of list 'unordered'. # Then pd.Series().unique() works well. # order = ordered + list(np.unique(unordered)) order = ordered + list(pd.Series(unordered).unique()) f = lambda idx: sum([[x + 'coef', x + 'stde'] for x in idx], []) # summ.index = f(np.unique(varnames)) summ.index = f(pd.Series(varnames).unique()) summ = summ.reindex(f(order)) summ.index = [x[:-4] for x in summ.index] idx = pd.Series(lrange(summ.shape[0])) % 2 == 1 summ.index = np.where(idx, '', summ.index.get_level_values(0)) summ = summ.fillna('') # add infos about the models. cols = [_col_info(x, more_info=more_info) for x in results] # use unique column names, otherwise the merge will not succeed for df, name in zip(cols, _make_unique([df.columns[0] for df in cols])): df.columns = [name] merg = lambda x, y: x.merge( y, how='outer', right_index=True, left_index=True) info = reduce(merg, cols) info.columns = summ.columns info = info.fillna('') if show is 't': note = ['\t t statistics in parentheses.'] if show is 'se': note = ['\t Std. error in parentheses.'] if show is 'p': note = ['\t pvalues in parentheses.'] if stars: note += ['\t * p<.1, ** p<.05, ***p<.01'] #Here I tried two ways to put extra text in index-location or # columns-location,finally found the former is better. note_df = pd.DataFrame([], index=['note:'] + note, columns=summ.columns).fillna('') if title is not None: title = str(title) else: title = '\t Results Summary' # Here I tried to construct a title DataFrame and # adjust the location of title corresponding to the length of columns. # But I failed because of not good printing effect. title_df = pd.DataFrame([], index=[title], columns=summ.columns).fillna('') smry = Summary() smry.add_df(title_df, header=False, align='l') # title DF smry.add_df(summ, header=True, align='l') # params DF smry.add_df(info, header=False, align='l') # model information DF smry.add_df(note_df, header=False, align='l') # extra text DF return smry