def test_mosaic_very_complex(): # make a scattermatrix of mosaic plots to show the correlations between # each pair of variable in a dataset. Could be easily converted into a # new function that does this automatically based on the type of data key_name = ["gender", "age", "health", "work"] key_base = (["male", "female"], ["old", "young"], ["healty", "ill"], ["work", "unemployed"]) keys = list(product(*key_base)) data = OrderedDict(list(zip(keys, list(range(1, 1 + len(keys)))))) props = {} props[("male", "old")] = {"color": "r"} props[("female",)] = {"color": "pink"} L = len(key_base) fig, axes = pylab.subplots(L, L) for i in range(L): for j in range(L): m = set(range(L)).difference(set((i, j))) if i == j: axes[i, i].text(0.5, 0.5, key_name[i], ha="center", va="center") axes[i, i].set_xticks([]) axes[i, i].set_xticklabels([]) axes[i, i].set_yticks([]) axes[i, i].set_yticklabels([]) else: ji = max(i, j) ij = min(i, j) temp_data = OrderedDict([((k[ij], k[ji]) + tuple(k[r] for r in m), v) for k, v in list(data.items())]) keys = list(temp_data.keys()) for k in keys: value = _reduce_dict(temp_data, k[:2]) temp_data[k[:2]] = value del temp_data[k] mosaic(temp_data, ax=axes[i, j], axes_label=False, properties=props, gap=0.05, horizontal=i > j) pylab.suptitle("old males should look bright red, (plot 4 of 4)")
def _normalize_data(data, index): """normalize the data to a dict with tuples of strings as keys right now it works with: 0 - dictionary (or equivalent mappable) 1 - pandas.Series with simple or hierarchical indexes 2 - numpy.ndarrays 3 - everything that can be converted to a numpy array 4 - pandas.DataFrame (via the _normalize_dataframe function) """ # if data is a dataframe we need to take a completely new road # before coming back here. Use the hasattr to avoid importing # pandas explicitly if hasattr(data, 'pivot') and hasattr(data, 'groupby'): data = _normalize_dataframe(data, index) index = None # can it be used as a dictionary? try: items = list(data.items()) except AttributeError: # ok, I cannot use the data as a dictionary # Try to convert it to a numpy array, or die trying data = np.asarray(data) temp = OrderedDict() for idx in np.ndindex(data.shape): name = tuple(i for i in idx) temp[name] = data[idx] data = temp items = list(data.items()) # make all the keys a tuple, even if simple numbers data = OrderedDict([_tuplify(k), v] for k, v in items) categories_levels = _categories_level(list(data.keys())) # fill the void in the counting dictionary indexes = product(*categories_levels) contingency = OrderedDict([(k, data.get(k, 0)) for k in indexes]) data = contingency # reorder the keys order according to the one specified by the user # or if the index is None convert it into a simple list # right now it doesn't do any check, but can be modified in the future index = list(range(len(categories_levels))) if index is None else index contingency = OrderedDict() for key, value in list(data.items()): new_key = tuple(key[i] for i in index) contingency[new_key] = value data = contingency return data
def test_mosaic_very_complex(): # make a scattermatrix of mosaic plots to show the correlations between # each pair of variable in a dataset. Could be easily converted into a # new function that does this automatically based on the type of data key_name = ['gender', 'age', 'health', 'work'] key_base = (['male', 'female'], ['old', 'young'], ['healty', 'ill'], ['work', 'unemployed']) keys = list(product(*key_base)) data = OrderedDict(zip(keys, range(1, 1 + len(keys)))) props = {} props[('male', 'old')] = {'color': 'r'} props[('female', )] = {'color': 'pink'} L = len(key_base) fig, axes = pylab.subplots(L, L) for i in range(L): for j in range(L): m = set(range(L)).difference(set((i, j))) if i == j: axes[i, i].text(0.5, 0.5, key_name[i], ha='center', va='center') axes[i, i].set_xticks([]) axes[i, i].set_xticklabels([]) axes[i, i].set_yticks([]) axes[i, i].set_yticklabels([]) else: ji = max(i, j) ij = min(i, j) temp_data = OrderedDict([((k[ij], k[ji]) + tuple(k[r] for r in m), v) for k, v in data.items()]) keys = temp_data.keys() for k in keys: value = _reduce_dict(temp_data, k[:2]) temp_data[k[:2]] = value del temp_data[k] mosaic(temp_data, ax=axes[i, j], axes_label=False, properties=props, gap=0.05, horizontal=i > j) pylab.suptitle('old males should look bright red, (plot 4 of 4)')
def _normalize_data(data, index): """normalize the data to a dict with tuples of strings as keys right now it works with: 0 - dictionary (or equivalent mappable) 1 - pandas.Series with simple or hierarchical indexes 2 - numpy.ndarrays 3 - everything that can be converted to a numpy array 4 - pandas.DataFrame (via the _normalize_dataframe function) """ # if data is a dataframe we need to take a completely new road # before coming back here. Use the hasattr to avoid importing # pandas explicitly if hasattr(data, 'pivot') and hasattr(data, 'groupby'): data = _normalize_dataframe(data, index) index = None # can it be used as a dictionary? try: items = list(data.iteritems()) except AttributeError: # ok, I cannot use the data as a dictionary # Try to convert it to a numpy array, or die trying data = np.asarray(data) temp = OrderedDict() for idx in np.ndindex(data.shape): name = tuple(i for i in idx) temp[name] = data[idx] data = temp items = data.items() # make all the keys a tuple, even if simple numbers data = OrderedDict([_tuplify(k), v] for k, v in items) categories_levels = _categories_level(list(data.keys())) # fill the void in the counting dictionary indexes = product(*categories_levels) contingency = OrderedDict([(k, data.get(k, 0)) for k in indexes]) data = contingency # reorder the keys order according to the one specified by the user # or if the index is None convert it into a simple list # right now it doesn't do any check, but can be modified in the future index = list(range(len(categories_levels))) if index is None else index contingency = OrderedDict() for key, value in data.items(): new_key = tuple(key[i] for i in index) contingency[new_key] = value data = contingency return data
def summary_model(results): """Create a dict with information about the model """ def time_now(**kwrds): now = datetime.datetime.now() return now.strftime("%Y-%m-%d %H:%M") info = OrderedDict() info["Model:"] = lambda x: x.model.__class__.__name__ info["Model Family:"] = lambda x: x.family.__class.__name__ info["Link Function:"] = lambda x: x.family.link.__class__.__name__ info["Dependent Variable:"] = lambda x: x.model.endog_names info["Date:"] = time_now() info["No. Observations:"] = lambda x: "%#6d" % x.nobs info["Df Model:"] = lambda x: "%#6d" % x.df_model info["Df Residuals:"] = lambda x: "%#6d" % x.df_resid info["Converged:"] = lambda x: x.mle_retvals["converged"] info["No. Iterations:"] = lambda x: x.mle_retvals["iterations"] info["Method:"] = lambda x: x.method info["Norm:"] = lambda x: x.fit_options["norm"] info["Scale Est.:"] = lambda x: x.fit_options["scale_est"] info["Cov. Type:"] = lambda x: x.fit_options["cov"] info["R-squared:"] = lambda x: "%#8.3f" % x.rsquared info["Adj. R-squared:"] = lambda x: "%#8.3f" % x.rsquared_adj info["Pseudo R-squared:"] = lambda x: "%#8.3f" % x.prsquared info["AIC:"] = lambda x: "%8.4f" % x.aic info["BIC:"] = lambda x: "%8.4f" % x.bic info["Log-Likelihood:"] = lambda x: "%#8.5g" % x.llf info["LL-Null:"] = lambda x: "%#8.5g" % x.llnull info["LLR p-value:"] = lambda x: "%#8.5g" % x.llr_pvalue info["Deviance:"] = lambda x: "%#8.5g" % x.deviance info["Pearson chi2:"] = lambda x: "%#6.3g" % x.pearson_chi2 info["F-statistic:"] = lambda x: "%#8.4g" % x.fvalue info["Prob (F-statistic):"] = lambda x: "%#6.3g" % x.f_pvalue info["Scale:"] = lambda x: "%#8.5g" % x.scale out = OrderedDict() for key in info.keys(): try: out[key] = info[key](results) except: pass return out
def summary_model(results): '''Create a dict with information about the model ''' def time_now(**kwrds): now = datetime.datetime.now() return now.strftime('%Y-%m-%d %H:%M') info = OrderedDict() info['Model:'] = lambda x: x.model.__class__.__name__ info['Model Family:'] = lambda x: x.family.__class.__name__ info['Link Function:'] = lambda x: x.family.link.__class__.__name__ info['Dependent Variable:'] = lambda x: x.model.endog_names info['Date:'] = time_now() info['No. Observations:'] = lambda x: "%#6d" % x.nobs info['Df Model:'] = lambda x: "%#6d" % x.df_model info['Df Residuals:'] = lambda x: "%#6d" % x.df_resid info['Converged:'] = lambda x: x.mle_retvals['converged'] info['No. Iterations:'] = lambda x: x.mle_retvals['iterations'] info['Method:'] = lambda x: x.method info['Norm:'] = lambda x: x.fit_options['norm'] info['Scale Est.:'] = lambda x: x.fit_options['scale_est'] info['Cov. Type:'] = lambda x: x.fit_options['cov'] info['R-squared:'] = lambda x: "%#8.3f" % x.rsquared info['Adj. R-squared:'] = lambda x: "%#8.3f" % x.rsquared_adj info['Pseudo R-squared:'] = lambda x: "%#8.3f" % x.prsquared info['AIC:'] = lambda x: "%8.4f" % x.aic info['BIC:'] = lambda x: "%8.4f" % x.bic info['Log-Likelihood:'] = lambda x: "%#8.5g" % x.llf info['LL-Null:'] = lambda x: "%#8.5g" % x.llnull info['LLR p-value:'] = lambda x: "%#8.5g" % x.llr_pvalue info['Deviance:'] = lambda x: "%#8.5g" % x.deviance info['Pearson chi2:'] = lambda x: "%#6.3g" % x.pearson_chi2 info['F-statistic:'] = lambda x: "%#8.4g" % x.fvalue info['Prob (F-statistic):'] = lambda x: "%#6.3g" % x.f_pvalue info['Scale:'] = lambda x: "%#8.5g" % x.scale out = OrderedDict() for key in info.keys(): try: out[key] = info[key](results) except: pass return out
def summary_model(results): '''Create a dict with information about the model ''' def time_now(**kwrds): now = datetime.datetime.now() return now.strftime('%Y-%m-%d %H:%M') info = OrderedDict() info['Model:'] = lambda x: x.model.__class__.__name__ info['Model Family:'] = lambda x: x.family.__class.__name__ info['Link Function:'] = lambda x: x.family.link.__class__.__name__ info['Dependent Variable:'] = lambda x: x.model.endog_names info['Date:'] = time_now() info['No. Observations:'] = lambda x: "%#6d" % x.nobs info['Df Model:'] = lambda x: "%#6d" % x.df_model info['Df Residuals:'] = lambda x: "%#6d" % x.df_resid info['Converged:'] = lambda x: x.mle_retvals['converged'] info['No. Iterations:'] = lambda x: x.mle_retvals['iterations'] info['Method:'] = lambda x: x.method info['Norm:'] = lambda x: x.fit_options['norm'] info['Scale Est.:'] = lambda x: x.fit_options['scale_est'] info['Cov. Type:'] = lambda x: x.fit_options['cov'] info['R-squared:'] = lambda x: "%#8.3f" % x.rsquared info['Adj. R-squared:'] = lambda x: "%#8.3f" % x.rsquared_adj info['Pseudo R-squared:'] = lambda x: "%#8.3f" % x.prsquared info['AIC:'] = lambda x: "%8.4f" % x.aic info['BIC:'] = lambda x: "%8.4f" % x.bic info['Log-Likelihood:'] = lambda x: "%#8.5g" % x.llf info['LL-Null:'] = lambda x: "%#8.5g" % x.llnull info['LLR p-value:'] = lambda x: "%#8.5g" % x.llr_pvalue info['Deviance:'] = lambda x: "%#8.5g" % x.deviance info['Pearson chi2:'] = lambda x: "%#6.3g" % x.pearson_chi2 info['F-statistic:'] = lambda x: "%#8.4g" % x.fvalue info['Prob (F-statistic):'] = lambda x: "%#6.3g" % x.f_pvalue info['Scale:'] = lambda x: "%#8.5g" % x.scale out = OrderedDict() for key in list(info.keys()): try: out[key] = info[key](results) except: pass return out
def test_mosaic_very_complex(): # make a scattermatrix of mosaic plots to show the correlations between # each pair of variable in a dataset. Could be easily converted into a # new function that does this automatically based on the type of data key_name = ['gender', 'age', 'health', 'work'] key_base = (['male', 'female'], ['old', 'young'], ['healty', 'ill'], ['work', 'unemployed']) keys = list(product(*key_base)) data = OrderedDict(zip(keys, range(1, 1 + len(keys)))) props = {} props[('male', 'old')] = {'color': 'r'} props[('female',)] = {'color': 'pink'} L = len(key_base) fig, axes = pylab.subplots(L, L) for i in range(L): for j in range(L): m = set(range(L)).difference(set((i, j))) if i == j: axes[i, i].text(0.5, 0.5, key_name[i], ha='center', va='center') axes[i, i].set_xticks([]) axes[i, i].set_xticklabels([]) axes[i, i].set_yticks([]) axes[i, i].set_yticklabels([]) else: ji = max(i, j) ij = min(i, j) temp_data = OrderedDict([((k[ij], k[ji]) + tuple(k[r] for r in m), v) for k, v in data.items()]) keys = temp_data.keys() for k in keys: value = _reduce_dict(temp_data, k[:2]) temp_data[k[:2]] = value del temp_data[k] mosaic(temp_data, ax=axes[i, j], axes_label=False, properties=props, gap=0.05, horizontal=i > j) pylab.suptitle('old males should look bright red, (plot 4 of 4)')