def default(self, o): # TODO add more support types if isinstance(o, set): return {'__set__': list(o)} elif isinstance(o, numpy.ndarray): return {'__numpy__': _to_default_list(o)} elif hasattr(o, '_repr_html_'): rb = BrtcReprBuilder() rb.addHTML(o._repr_html_()) return { '_repr_brtc_': rb.get(), '__pickled__': list(pickle.dumps(o)) } elif hasattr(o, 'savefig'): rb = BrtcReprBuilder() rb.addPlt(o) return { '_repr_brtc_': rb.get(), '__pickled__': list(pickle.dumps(o)) } else: rb = BrtcReprBuilder() rb.addRawTextMD(str(o)) return { '_repr_brtc_': rb.get(), '__pickled__': list(pickle.dumps(o)) }
def _profile_table(table, bins=10, check_correlation=False, correlation_threshold=0.9, correlation_overrides=None): rb = BrtcReprBuilder() profile = pd_profiling.ProfileReport(table, bins=bins, check_correlation=check_correlation, correlation_threshold=correlation_threshold, correlation_overrides=correlation_overrides) rb.addHTML(profile.html) summary = dict() summary['_repr_brtc_'] = rb.get() return {'result': summary}
def _tukeys_range_test(table, response_cols, factor_col, alpha=0.05): rb = BrtcReprBuilder() rb.addMD("""## Tukey's range test Result""") for response_col in response_cols: data = table[response_col] posthoc = pairwise_tukeyhsd(data, table[factor_col], alpha=alpha) posthoc_html = posthoc._results_table.as_html() posthoc.plot_simultaneous() rb.addMD("""### {response_col}""".format(response_col=response_col)) rb.addHTML(posthoc_html) rb.addPlt(plt) plt.clf() return {'result': {'_repr_brtc_': rb.get()}}
def tukeys_range_test(table, response_cols, factor_col, alpha=0.05): if alpha < 0.001 or alpha >= 0.9: raise BrighticsFunctionException("0006", ['alpha', 0.001, 0.9]) rb = BrtcReprBuilder() rb.addMD("""## Tukey's range test Result""") for response_col in response_cols: data = table[response_col] posthoc = pairwise_tukeyhsd(data, table[factor_col], alpha=alpha) posthoc_html = posthoc._results_table.as_html() posthoc.plot_simultaneous() rb.addMD("""### {response_col}""".format(response_col=response_col)) rb.addHTML(posthoc_html) rb.addPlt(plt) plt.clf() return {'result': {'_repr_brtc_': rb.get()}}
def _profile_table(table, bins=10, check_correlation=False, correlation_threshold=0.9, correlation_overrides=None): validate(greater_than_or_equal_to(bins, 1, 'bins'), greater_than(correlation_threshold, 0.0, 'correlation_threshold')) rb = BrtcReprBuilder() profile = pd_profiling.ProfileReport( table, bins=bins, check_correlation=check_correlation, correlation_threshold=correlation_threshold, correlation_overrides=correlation_overrides) rb.addHTML(profile.html) summary = dict() summary['_repr_brtc_'] = rb.get() return {'result': summary}
def _lda4(table, input_col, topic_name='topic', num_voca=1000, num_topic=5, num_topic_word=10, max_iter=20, learning_method='online', learning_offset=10., random_state=None): # generate model corpus = np.array(table[input_col]) if isinstance(corpus[0], np.ndarray): tf_vectorizer = CountVectorizer(preprocessor=' '.join, stop_words='english', max_df=0.95, min_df=2, max_features=num_voca) else: tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2, max_features=num_voca, stop_words='english') term_count = tf_vectorizer.fit_transform(corpus) tf_feature_names = tf_vectorizer.get_feature_names() if learning_method == 'online': lda_model = LatentDirichletAllocation( n_components=num_topic, max_iter=max_iter, learning_method=learning_method, learning_offset=learning_offset, random_state=random_state).fit(term_count) elif learning_method == 'batch': lda_model = LatentDirichletAllocation( n_components=num_topic, max_iter=max_iter, learning_method=learning_method, random_state=random_state).fit(term_count) else: raise_runtime_error("Please check 'learning_method'.") log_likelihood = lda_model.score(term_count) perplexity = lda_model.perplexity(term_count) # create topic table vocab_weights_list = [] vocab_list = [] weights_list = [] topic_term_prob = normalize(lda_model.components_, norm='l1') for vector in topic_term_prob: pairs = [] for term_idx, value in enumerate(vector): pairs.append((abs(value), tf_feature_names[term_idx])) pairs.sort(key=lambda x: x[0], reverse=True) vocab_weights = [] vocab = [] weights = [] for pair in pairs[:num_topic_word]: vocab_weights.append("{}: {}".format(pair[1], pair[0])) vocab.append(pair[1]) weights.append(pair[0]) vocab_weights_list.append(vocab_weights) vocab_list.append(vocab) weights_list.append(weights) topic_table = pd.DataFrame({ 'vocabularies_weights': vocab_weights_list, 'vocabularies': vocab_list, 'weights': weights_list }) topic_table['index'] = [idx + 1 for idx in topic_table.index] topic_table = topic_table[[ 'index', 'vocabularies_weights', 'vocabularies', 'weights' ]] # create output table doc_topic = lda_model.transform(term_count) out_table = pd.DataFrame.copy(table, deep=True) topic_dist_name = topic_name + '_distribution' if topic_name in table.columns or topic_dist_name in table.columns: raise BrighticsFunctionException.from_errors([{ '0100': "Existing table contains Topic Column Name. Please choose again." }]) out_table[topic_name] = [ doc_topic[i].argmax() + 1 for i in range(len(corpus)) ] out_table[topic_dist_name] = doc_topic.tolist() # pyLDAvis prepared_data = ldavis.prepare(lda_model, term_count, tf_vectorizer) html_result = pyLDAvis.prepared_data_to_html(prepared_data) # generate report params = { 'Input column': input_col, 'Topic column name': topic_name, 'Number of topics': num_topic, 'Number of words for each topic': num_topic_word, 'Maximum number of iterations': max_iter, 'Learning method': learning_method, 'Learning offset': learning_offset, 'Seed': random_state } rb = BrtcReprBuilder() rb.addMD( strip_margin(""" | ## Latent Dirichlet Allocation Result | ### Summary | """)) rb.addHTML(html_result) rb.addMD( strip_margin(""" | | ### Log Likelihood | {log_likelihood} | | ### Perplexity | {perplexity} | | ### Parameters | {params} """.format(log_likelihood=log_likelihood, perplexity=perplexity, params=dict2MD(params)))) # create model model = _model_dict('lda_model') model['params'] = params model['lda_model'] = lda_model model['_repr_brtc_'] = rb.get() return {'out_table': out_table, 'topic_table': topic_table, 'model': model}
def _linear_regression_train(table, feature_cols, label_col, fit_intercept=True, is_vif=False, vif_threshold=10): feature_names, features = check_col_type(table, feature_cols) label = table[label_col] if fit_intercept == True: features = sm.add_constant(features, has_constant='add') lr_model_fit = sm.OLS(label, features).fit() else: lr_model_fit = sm.OLS(label, features).fit() predict = lr_model_fit.predict(features) residual = label - predict summary = lr_model_fit.summary() summary_tables = simple_tables2df_list(summary.tables, drop_index=True) summary0 = summary_tables[0] summary1 = summary_tables[1] if type(features) != type(table): features = pd.DataFrame(features) if is_vif: summary1['VIF'] = [ variance_inflation_factor(features.values, i) for i in range(features.shape[1]) ] summary1['VIF>{}'.format(vif_threshold)] = summary1['VIF'].apply( lambda _: 'true' if _ > vif_threshold else 'false') summary.tables[1] = _df_to_simpletable(summary1) summary2 = summary_tables[2] html_result = summary.as_html() plt.figure() plt.scatter(predict, label) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Actual values for ' + label_col) x = predict p1x = np.min(x) p2x = np.max(x) plt.plot([p1x, p2x], [p1x, p2x], 'r--') fig_actual_predict = plt2MD(plt) plt.figure() plt.scatter(predict, residual) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Residuals') plt.axhline(y=0, color='r', linestyle='--') fig_residual_1 = plt2MD(plt) plt.figure() sm.qqplot(residual, line='s') plt.ylabel('Residuals') fig_residual_2 = plt2MD(plt) plt.figure() sns.distplot(residual) plt.xlabel('Residuals') fig_residual_3 = plt2MD(plt) rb = BrtcReprBuilder() rb.addMD( strip_margin(""" | ## Linear Regression Result | ### Summary | """)) rb.addHTML(html_result) rb.addMD( strip_margin(""" | | ### Predicted vs Actual | {image1} | | ### Fit Diagnostics | {image2} | {image3} | {image4} """.format(image1=fig_actual_predict, image2=fig_residual_1, image3=fig_residual_2, image4=fig_residual_3))) model = _model_dict('linear_regression_model') model['features'] = feature_cols model['label'] = label_col model['coefficients'] = lr_model_fit.params model['fit_intercept'] = fit_intercept model['r2'] = lr_model_fit.rsquared model['adjusted_r2'] = lr_model_fit.rsquared_adj model['aic'] = lr_model_fit.aic model['bic'] = lr_model_fit.bic model['f_static'] = lr_model_fit.fvalue model['tvalues'] = lr_model_fit.tvalues model['pvalues'] = lr_model_fit.pvalues model['_repr_brtc_'] = rb.get() model['summary0'] = summary0 model['summary1'] = summary1 model['summary2'] = summary2 lr_model_fit.remove_data() model['lr_model'] = lr_model_fit return {'model': model}
def _glm_train(table, feature_cols, label_col, family="Gaussian", link="ident", fit_intercept=True): features = table[feature_cols] label = table[label_col] if label_col in feature_cols: raise_runtime_error("%s is duplicated." % label_col) if family == "Gaussian": sm_family = sm.families.Gaussian() elif family == "inv_Gaussian": sm_family = sm.families.InverseGaussian() elif family == "binomial": sm_family = sm.families.Binomial() elif family == "Poisson": sm_family = sm.families.Poisson() elif family == "neg_binomial": sm_family = sm.families.NegativeBinomial() elif family == "gamma": sm_family = sm.families.Gamma() elif family == "Tweedie": sm_family = sm.families.Tweedie() if link == "ident": sm_link = sm.families.links.identity elif link == "log": sm_link = sm.families.links.log elif link == "logit": sm_link = sm.families.links.logit elif link == "probit": sm_link = sm.families.links.probit elif link == "cloglog": sm_link = sm.families.links.cLogLog elif link == "pow": sm_link = sm.families.links.Power elif link == "nbinom": sm_link = sm.families.links.binom if fit_intercept == True: glm_model = sm.GLM(label, sm.add_constant(features), family=sm_family, link=sm_link).fit() else: glm_model = sm.GLM(label, features, family=sm_family, link=sm_link).fit() summary = glm_model.summary().as_html() rb = BrtcReprBuilder() rb.addMD(strip_margin(""" | ## GLM Result | ### Summary | """)) rb.addHTML(summary) model = _model_dict('glm_model') model['features'] = feature_cols model['label'] = label_col model['family'] = family model['link'] = link model['coefficients'] = glm_model.params model['aic'] = glm_model.aic model['bic'] = glm_model.bic model['tvalues'] = glm_model.tvalues model['pvalues'] = glm_model.pvalues model['fit_intercept'] = fit_intercept model['glm_model'] = glm_model model['_repr_brtc_'] = rb.get() return {'model' : model}
def _dtm(table, input_col, topic_name='topic', num_topic=5, num_topic_word=10, max_iter=20, time_slice=None, coherence='u_mass', vis_time=0, seed=None): running_os = platform.system() is_os_64bit = platform.machine().endswith('64') if running_os == 'Linux': if is_os_64bit: dtm_filename = 'dtm-linux64' else: dtm_filename = 'dtm-linux32' elif running_os == 'Windows': if is_os_64bit: dtm_filename = 'dtm-win64.exe' else: dtm_filename = 'dtm-win32.exe' else: # Mac dtm_filename = 'dtm-darwin64' dtm_path = os.path.join(str(pathlib.Path(__file__).parent.absolute()), 'dtm', dtm_filename) if running_os != 'Windows': bash_command = "chmod +x {}".format(dtm_path) os.system(bash_command) tokenized_doc = np.array(table[input_col]) num_doc = len(tokenized_doc) if time_slice is None: time_slice = [num_doc] elif sum(time_slice) != num_doc: raise_runtime_error("The sum of time slice list does not match the number of documents.") if vis_time < 0 or vis_time >= len(time_slice): raise_runtime_error("Invalid time parameter: {}".format(vis_time)) dictionary = corpora.Dictionary(tokenized_doc) corpus = [dictionary.doc2bow(text) for text in tokenized_doc] dtm_params = {"corpus": corpus, "id2word": dictionary, "time_slices": time_slice, "num_topics": num_topic, "lda_sequence_max_iter": max_iter, "model": 'dtm'} if seed is not None: dtm_params["rng_seed"] = seed dtm_model = DtmModel(dtm_path, **dtm_params) topic_time = [[dtm_model.show_topic(topicid=id, time=t, topn=num_topic_word) for id in range(num_topic)] for t in range(len(time_slice))] topic_time = [[["{}: {}".format(tup[1], tup[0]) for tup in topic] for topic in time] for time in topic_time] timeline = ["{} ({} docs)".format(ind, t) for ind, t in enumerate(time_slice)] columns = ["topic_{}".format(i + 1) for i in range(num_topic)] topic_table = pd.DataFrame(topic_time, columns=columns) topic_table['time'] = timeline topic_table = topic_table[['time'] + columns] prop_arr = dtm_model.gamma_ out_table = pd.DataFrame.copy(table, deep=True) if topic_name in table.columns: raise BrighticsFunctionException.from_errors( [{'0100': "Existing table contains Topic Column Name. Please choose again."}]) out_table[topic_name] = [item.argmax() + 1 for item in prop_arr] out_table['topic_distribution'] = prop_arr.tolist() coherence_topic_arr = [dtm_model.dtm_coherence(time) for time in range(len(time_slice))] if coherence == 'u_mass': coh_arr = [CoherenceModel(topics=item, dictionary=dictionary, corpus=corpus, coherence='u_mass').get_coherence() for item in coherence_topic_arr] else: coh_arr = [CoherenceModel(topics=item, dictionary=dictionary, corpus=corpus, texts=tokenized_doc, coherence='c_v').get_coherence() for item in coherence_topic_arr] doc_topic, topic_term, doc_lengths, term_frequency, vocab = dtm_model.dtm_vis(corpus, vis_time) prepared_data = plv.prepare(topic_term, doc_topic, doc_lengths, vocab, term_frequency, sort_topics=False) html_result = plv.prepared_data_to_html(prepared_data) params = {'Input column': input_col, 'Topic column name': topic_name, 'Number of topics': num_topic, 'Number of words for each topic': num_topic_word, 'Maximum number of iterations': max_iter, 'Time slice': time_slice, 'Coherence measure': coherence, 'Time to visualize': vis_time} rb = BrtcReprBuilder() rb.addMD(strip_margin(""" | ## Dynamic Topic Modeling Result | ### Summary | """)) rb.addHTML(html_result) rb.addMD(strip_margin(""" | ### Coherence for each period | {coh_arr} | | ### Parameters | {params} """.format(coh_arr=coh_arr, params=dict2MD(params)))) model = _model_dict('dtm_model') model['params'] = params model['dtm_model'] = dtm_model model['coherences'] = coh_arr model['corpus'] = corpus model['_repr_brtc_'] = rb.get() return {'out_table': out_table, 'topic_table': topic_table, 'model': model}
def _gsdmm(table, input_col, topic_name='topic', K=10, alpha=0.1, beta=0.1, max_iter=50, num_topic_words=3): docs = np.array(table[input_col]) docs_set = [set(doc) for doc in docs] docs_preprocessed = [list(doc_set) for doc_set in docs_set] vocab_set = list(set.union(*docs_set)) vocab_size = len(vocab_set) # initialize and train a GSDMM model mgp = gsdmm_rwalk.MovieGroupProcess(K=K, alpha=alpha, beta=beta, n_iters=max_iter) topics = mgp.fit(docs_preprocessed, vocab_size) # generate topic table topic_word_count = mgp.cluster_word_distribution topic_words_raw = [[ind, _count_to_ratio_raw(word_count)] for ind, word_count in enumerate(topic_word_count) if word_count] topic_words = [[item[0]] + _gen_table(item[1], num_topic_words) for item in topic_words_raw] # reset topic ids nonempty_topic_indices = [item[0] for item in topic_words] reset_topic_ind = { old_ind: (new_ind + 1) for new_ind, old_ind in enumerate(nonempty_topic_indices) } topics = [reset_topic_ind[old_ind] for old_ind in topics] topic_words = [[reset_topic_ind[old_item[0]]] + old_item[1:] for old_item in topic_words] # generate output dataframes out_table = pd.DataFrame.copy(table, deep=True) if topic_name in table.columns: raise BrighticsFunctionException.from_errors([{ '0100': "Existing table contains the topic column name. Please choose another name." }]) out_table[topic_name] = topics columns = ['index', 'vocabularies_weights', 'vocabularies', 'weights'] topic_table = pd.DataFrame(topic_words, columns=columns) topic_table['weights'] = topic_table['weights'].apply(pd.to_numeric) # pyLDAvis if len(topic_words) == 1: html_result = None else: topic_words_dicts = [item[1] for item in topic_words_raw] topic_term_dists = [[ topic_words_dict.get(word, 0) for word in vocab_set ] for topic_words_dict in topic_words_dicts] num_docs = len(topics) num_topics = len(topic_words_raw) doc_topic_dists = np.zeros((num_docs, num_topics)) for doc_id, topic_id in enumerate(topics): doc_topic_dists[doc_id][topic_id - 1] = 1.0 doc_lengths = [len(doc) for doc in docs_preprocessed] vocab_count = functools.reduce( lambda dict_1, dict_2: { word: dict_1.get(word, 0) + dict_2.get(word, 0) for word in set(dict_1).union(dict_2) }, topic_word_count) term_frequency = [vocab_count.get(word) for word in vocab_set] prepared_data = pyLDAvis.prepare(topic_term_dists, doc_topic_dists, doc_lengths, vocab_set, term_frequency) html_result = pyLDAvis.prepared_data_to_html(prepared_data) # generate report params = { 'Input column': input_col, 'Topic column name': topic_name, 'K': K, 'Alpha': alpha, 'Beta': beta, 'Maximum number of iterations': max_iter, 'Number of words for each topic': num_topic_words } rb = BrtcReprBuilder() rb.addMD( strip_margin(""" | ## GSDMM Result | ### Summary | """)) if html_result is not None: rb.addHTML(html_result) rb.addMD(strip_margin(""" | """)) rb.addMD( strip_margin(""" | ### Final Number of Topics | {num_topics} | | ### Parameters | {params} """.format(num_topics=len(topic_words_raw), params=dict2MD(params)))) # create model model = _model_dict('lda_model') model['params'] = params model['gsdmm_model'] = mgp model['_repr_brtc_'] = rb.get() return {'out_table': out_table, 'topic_table': topic_table, 'model': model}
def _linear_regression_train(table, feature_cols, label_col, fit_intercept=True, is_vif=True, vif_threshold=10): features = table[feature_cols] label = table[label_col] lr_model = LinearRegression(fit_intercept) lr_model.fit(features, label) predict = lr_model.predict(features) residual = label - predict if fit_intercept == True: features = sm.add_constant(features) lr_model_fit = sm.OLS(label, features).fit() else: lr_model_fit = sm.OLS(label, features).fit() summary = lr_model_fit.summary() summary_tables = simple_tables2df_list(summary.tables, drop_index=True) summary0 = summary_tables[0] summary1 = summary_tables[1] if is_vif: summary1['VIF'] = [ variance_inflation_factor(features.values, i) for i in range(features.shape[1]) ] summary1['VIF>{}'.format(vif_threshold)] = summary1['VIF'].apply( lambda _: 'true' if _ > vif_threshold else 'false') summary.tables[1] = _df_to_simpletable(summary1) summary2 = summary_tables[2] html_result = summary.as_html() plt.figure() plt.scatter(predict, label) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Actual values for ' + label_col) x = predict y = np.array(label) a = x.size b = np.sum(x) c = b d = 0 for i in x: d += +i * i e = np.sum(y) f = 0 for i in range(0, x.size - 1): f += x[i] * y[i] det = a * d - b * c aa = (d * e - b * f) / det bb = (a * f - c * e) / det p1x = np.min(x) p1y = aa + bb * p1x p2x = np.max(x) p2y = aa + bb * p2x plt.plot([p1x, p2x], [p1y, p2y], 'r--') fig_actual_predict = plt2MD(plt) plt.figure() plt.scatter(predict, residual) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Residuals') plt.axhline(y=0, color='r', linestyle='--') fig_residual_1 = plt2MD(plt) plt.figure() sm.qqplot(residual, line='s') plt.ylabel('Residuals') fig_residual_2 = plt2MD(plt) plt.figure() sns.distplot(residual) plt.xlabel('Residuals') fig_residual_3 = plt2MD(plt) rb = BrtcReprBuilder() rb.addMD( strip_margin(""" | ## Linear Regression Result | ### Summary | """)) rb.addHTML(html_result) rb.addMD( strip_margin(""" | | ### Predicted vs Actual | {image1} | | ### Fit Diagnostics | {image2} | {image3} | {image4} """.format(image1=fig_actual_predict, image2=fig_residual_1, image3=fig_residual_2, image4=fig_residual_3))) model = _model_dict('linear_regression_model') model['features'] = feature_cols model['label'] = label_col model['coefficients'] = lr_model_fit.params model['r2'] = lr_model_fit.rsquared model['adjusted_r2'] = lr_model_fit.rsquared_adj model['aic'] = lr_model_fit.aic model['bic'] = lr_model_fit.bic model['f_static'] = lr_model_fit.fvalue model['tvalues'] = lr_model_fit.tvalues model['pvalues'] = lr_model_fit.pvalues model['lr_model'] = lr_model model['_repr_brtc_'] = rb.get() model['summary0'] = summary0 model['summary1'] = summary1 model['summary2'] = summary2 return {'model': model}
def _linear_regression_train(table, feature_cols, label_col, fit_intercept=True): features = table[feature_cols] label = table[label_col] lr_model = LinearRegression(fit_intercept) lr_model.fit(features, label) predict = lr_model.predict(features) residual = label - predict if fit_intercept == True: lr_model_fit = sm.OLS(label, sm.add_constant(features)).fit() else: lr_model_fit = sm.OLS(label, features).fit() summary = lr_model_fit.summary().as_html() plt.figure() plt.scatter(predict, label) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Actual values for ' + label_col) x = predict y = np.array(label) a = x.size b = np.sum(x) c = b d = 0 for i in x: d += +i * i e = np.sum(y) f = 0 for i in range(0, x.size - 1): f += x[i] * y[i] det = a * d - b * c aa = (d * e - b * f) / det bb = (a * f - c * e) / det p1x = np.min(x) p1y = aa + bb * p1x p2x = np.max(x) p2y = aa + bb * p2x plt.plot([p1x, p2x], [p1y, p2y], 'r--') fig_actual_predict = plt2MD(plt) plt.figure() plt.scatter(predict, residual) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Residuals') plt.axhline(y=0, color='r', linestyle='--') fig_residual_1 = plt2MD(plt) plt.figure() sm.qqplot(residual, line='s') plt.ylabel('Residuals') fig_residual_2 = plt2MD(plt) plt.figure() sns.distplot(residual) plt.xlabel('Residuals') fig_residual_3 = plt2MD(plt) rb = BrtcReprBuilder() rb.addMD(strip_margin(""" | ## Linear Regression Result | ### Summary | """)) rb.addHTML(summary) rb.addMD(strip_margin(""" | | ### Predicted vs Actual | {image1} | | ### Fit Diagnostics | {image2} | {image3} | {image4} """.format(image1=fig_actual_predict, image2=fig_residual_1, image3=fig_residual_2, image4=fig_residual_3 ))) model = _model_dict('linear_regression_model') model['features'] = feature_cols model['label'] = label_col model['coefficients'] = lr_model_fit.params model['r2'] = lr_model_fit.rsquared model['adjusted_r2'] = lr_model_fit.rsquared_adj model['aic'] = lr_model_fit.aic model['bic'] = lr_model_fit.bic model['f_static'] = lr_model_fit.fvalue model['tvalues'] = lr_model_fit.tvalues model['pvalues'] = lr_model_fit.pvalues model['lr_model'] = lr_model model['_repr_brtc_'] = rb.get() return {'model' : model}