def partition_info_table(viomet_df, date_range, partition_infos): ''' TODO ''' index_keys = [('MSNBC', 'MSNBCW'), ('CNN', 'CNNW'), ('Fox News', 'FOXNEWSW')] columns = [ '$t_0^{(2)}$', '$t^{(2)}_{N^{(2)}}$', '$f^{(1)}$', '$f^{(2)}$', 'reactivity', 'total uses' ] counts_df = daily_metaphor_counts(viomet_df, date_range, by=['network']) data = [] for ik in index_keys: key = ik[1] pi = partition_infos[key] data.append([ pi.partition_date_1, pi.partition_date_2, pi.f_ground, pi.f_excited, ((pi.f_excited - pi.f_ground) / pi.f_ground), counts_df[key].sum() ]) index = [ik[0] for ik in index_keys] return pd.DataFrame(data=data, index=index, columns=columns)
def by_network_word_table(project_df, date_range, partition_infos, words=['hit', 'beat', 'attack']): ''' Second table in paper ''' networks = ['MSNBC', 'CNN', 'Fox News'] columns = ['fg', 'fe', 'reactivity', 'total uses'] # index_tuples = [(net, word) for net in networks for word in words] index_tuples = [(word, net) for word in words for net in networks] index = pd.MultiIndex.from_tuples(index_tuples, names=['Violent Word', 'Network']) df = pd.DataFrame(index=index, columns=columns, data=0.0) project_df = project_df[project_df.include] counts_df = daily_metaphor_counts(project_df, date_range, by=['network', 'facet_word']) for idx, netid in enumerate(['MSNBCW', 'CNNW', 'FOXNEWSW']): sum_g, n_g = _get_ground(counts_df, netid, partition_infos, words=words) sum_e, n_e = _get_excited(counts_df, netid, partition_infos, words=words) freq_g = sum_g / n_g freq_e = sum_e / n_e reactivity = ((freq_e - freq_g) / freq_g) totals = sum_g + sum_e network = networks[idx] for word in words: df.loc[word, network] = [ freq_g[word], freq_e[word], reactivity[word], totals[word] ] fancy_columns = ['$f^{(1)}$', '$f^{(2)}$', 'reactivity', 'total uses'] df.columns = fancy_columns return df
def by_network_subj_obj_table(viomet_df, date_range, partition_infos, subjects=['Barack Obama', 'Mitt Romney'], objects=['Barack Obama', 'Mitt Romney']): ''' TODO ''' networks = ['MSNBC', 'CNN', 'Fox News'] columns = ['fg', 'fe', 'reactivity', 'total uses'] # index_tuples = [(net, word) for net in networks for word in words] subj_objs = ["Subject=" + subj for subj in subjects] \ + ["Object=" + obj for obj in objects] index_tuples = [(so, net) for so in subj_objs for net in networks] index = pd.MultiIndex.from_tuples(index_tuples, names=['Subject/Object', 'Network']) df = pd.DataFrame(index=index, columns=columns, data=0.0) # Next two blocks support more than two subjects or objects. subject_rows = reduce( lambda x, y: (viomet_df.subjects == x) | (viomet_df.subjects == y), subjects) object_rows = reduce( lambda x, y: (viomet_df.objects == x) | (viomet_df.objects == y), objects) subject_df = viomet_df[subject_rows] object_df = viomet_df[object_rows] subject_counts_df = daily_metaphor_counts( subject_df, date_range, by=['network', 'subjects'], ) object_counts_df = daily_metaphor_counts(object_df, date_range, by=['network', 'objects']) for idx, network_id in enumerate(['MSNBCW', 'CNNW', 'FOXNEWSW']): # Ground state data. sum_subj_g, n_subj_g = _get_ground(subject_counts_df, network_id, partition_infos) sum_obj_g, n_obj_g = _get_ground(object_counts_df, network_id, partition_infos) # Excited state data. sum_subj_e, n_subj_e = _get_excited(subject_counts_df, network_id, partition_infos) sum_obj_e, n_obj_e = _get_excited(object_counts_df, network_id, partition_infos) freq_subj_g = sum_subj_g / n_subj_g freq_obj_g = sum_obj_g / n_obj_g freq_subj_e = sum_subj_e / n_subj_e freq_obj_e = sum_obj_e / n_obj_e reactivity_diff_subj = ((freq_subj_e - freq_subj_g) / 2.0) reactivity_diff_obj = ((freq_obj_e - freq_obj_g) / 2.0) totals_subj = sum_subj_g + sum_subj_e totals_obj = sum_obj_g + sum_obj_e network = networks[idx] for subject in subjects: df.loc["Subject=" + subject, network] = [ freq_subj_g[subject], freq_subj_e[subject], reactivity_diff_subj[subject], totals_subj[subject] ] for object_ in objects: df.loc["Object=" + object_, network] = [ freq_obj_g[object_], freq_obj_e[object_], reactivity_diff_obj[object_], totals_obj[object_] ] fancy_columns = ['$f^{(1)}$', '$f^{(2)}$', 'reactivity', 'total uses'] df.columns = fancy_columns return df