Beispiel #1
0
def partition_info_table(viomet_df, date_range, partition_infos):
    '''
    TODO
    '''
    index_keys = [('MSNBC', 'MSNBCW'), ('CNN', 'CNNW'),
                  ('Fox News', 'FOXNEWSW')]

    columns = [
        '$t_0^{(2)}$', '$t^{(2)}_{N^{(2)}}$', '$f^{(1)}$', '$f^{(2)}$',
        'reactivity', 'total uses'
    ]

    counts_df = daily_metaphor_counts(viomet_df, date_range, by=['network'])

    data = []
    for ik in index_keys:
        key = ik[1]
        pi = partition_infos[key]
        data.append([
            pi.partition_date_1, pi.partition_date_2,
            pi.f_ground, pi.f_excited,
            ((pi.f_excited - pi.f_ground) / pi.f_ground), counts_df[key].sum()
        ])

    index = [ik[0] for ik in index_keys]

    return pd.DataFrame(data=data, index=index, columns=columns)
Beispiel #2
0
def by_network_word_table(project_df,
                          date_range,
                          partition_infos,
                          words=['hit', 'beat', 'attack']):
    '''
    Second table in paper
    '''
    networks = ['MSNBC', 'CNN', 'Fox News']
    columns = ['fg', 'fe', 'reactivity', 'total uses']

    # index_tuples = [(net, word) for net in networks for word in words]
    index_tuples = [(word, net) for word in words for net in networks]

    index = pd.MultiIndex.from_tuples(index_tuples,
                                      names=['Violent Word', 'Network'])

    df = pd.DataFrame(index=index, columns=columns, data=0.0)

    project_df = project_df[project_df.include]
    counts_df = daily_metaphor_counts(project_df,
                                      date_range,
                                      by=['network', 'facet_word'])

    for idx, netid in enumerate(['MSNBCW', 'CNNW', 'FOXNEWSW']):

        sum_g, n_g = _get_ground(counts_df,
                                 netid,
                                 partition_infos,
                                 words=words)
        sum_e, n_e = _get_excited(counts_df,
                                  netid,
                                  partition_infos,
                                  words=words)

        freq_g = sum_g / n_g
        freq_e = sum_e / n_e

        reactivity = ((freq_e - freq_g) / freq_g)

        totals = sum_g + sum_e

        network = networks[idx]
        for word in words:
            df.loc[word, network] = [
                freq_g[word], freq_e[word], reactivity[word], totals[word]
            ]

    fancy_columns = ['$f^{(1)}$', '$f^{(2)}$', 'reactivity', 'total uses']
    df.columns = fancy_columns

    return df
Beispiel #3
0
def by_network_subj_obj_table(viomet_df,
                              date_range,
                              partition_infos,
                              subjects=['Barack Obama', 'Mitt Romney'],
                              objects=['Barack Obama', 'Mitt Romney']):
    '''
    TODO
    '''
    networks = ['MSNBC', 'CNN', 'Fox News']
    columns = ['fg', 'fe', 'reactivity', 'total uses']

    # index_tuples = [(net, word) for net in networks for word in words]
    subj_objs = ["Subject=" + subj for subj in subjects] \
        + ["Object=" + obj for obj in objects]
    index_tuples = [(so, net) for so in subj_objs for net in networks]

    index = pd.MultiIndex.from_tuples(index_tuples,
                                      names=['Subject/Object', 'Network'])

    df = pd.DataFrame(index=index, columns=columns, data=0.0)

    # Next two blocks support more than two subjects or objects.
    subject_rows = reduce(
        lambda x, y: (viomet_df.subjects == x) | (viomet_df.subjects == y),
        subjects)
    object_rows = reduce(
        lambda x, y: (viomet_df.objects == x) | (viomet_df.objects == y),
        objects)
    subject_df = viomet_df[subject_rows]
    object_df = viomet_df[object_rows]

    subject_counts_df = daily_metaphor_counts(
        subject_df,
        date_range,
        by=['network', 'subjects'],
    )
    object_counts_df = daily_metaphor_counts(object_df,
                                             date_range,
                                             by=['network', 'objects'])

    for idx, network_id in enumerate(['MSNBCW', 'CNNW', 'FOXNEWSW']):
        # Ground state data.
        sum_subj_g, n_subj_g = _get_ground(subject_counts_df, network_id,
                                           partition_infos)
        sum_obj_g, n_obj_g = _get_ground(object_counts_df, network_id,
                                         partition_infos)
        # Excited state data.
        sum_subj_e, n_subj_e = _get_excited(subject_counts_df, network_id,
                                            partition_infos)
        sum_obj_e, n_obj_e = _get_excited(object_counts_df, network_id,
                                          partition_infos)
        freq_subj_g = sum_subj_g / n_subj_g
        freq_obj_g = sum_obj_g / n_obj_g
        freq_subj_e = sum_subj_e / n_subj_e
        freq_obj_e = sum_obj_e / n_obj_e

        reactivity_diff_subj = ((freq_subj_e - freq_subj_g) / 2.0)
        reactivity_diff_obj = ((freq_obj_e - freq_obj_g) / 2.0)

        totals_subj = sum_subj_g + sum_subj_e
        totals_obj = sum_obj_g + sum_obj_e

        network = networks[idx]
        for subject in subjects:
            df.loc["Subject=" + subject, network] = [
                freq_subj_g[subject], freq_subj_e[subject],
                reactivity_diff_subj[subject], totals_subj[subject]
            ]

        for object_ in objects:
            df.loc["Object=" + object_, network] = [
                freq_obj_g[object_], freq_obj_e[object_],
                reactivity_diff_obj[object_], totals_obj[object_]
            ]

        fancy_columns = ['$f^{(1)}$', '$f^{(2)}$', 'reactivity', 'total uses']
        df.columns = fancy_columns

    return df