def get_network_ts(df, network, year=2016): ''' MV use frequency for each network. ''' df = df.copy() # XXX daily_frequency by network works somewhat differently, but not # too sure how to describe it so...WATCH OUT! return daily_frequency(df, date_range(year), by=['network'])[network].dropna()
def test_daily_frequency(): test_corpus_name = _setup_mongo() date_index = pd.date_range('2016-9-1', '2016-9-4', freq='D') ic = IatvCorpus.objects(name=test_corpus_name)[0] # obtained by dividing total metaphor counts by total shows per day expected_metaphor_freq_all = pd.DataFrame( index=date_index, data={'freq': [.75, 1.5, 2.0/3.0, 2.0/3.0]} ) pn = [ 'Tracy Morgans news hour', 'Dingbat Alley', 'iCry Sad News Time', 'Digging Turnips with Ethan Land', 'Good morning, middle america!' ] n = ['MSNBCW', 'CNNW', 'FOXNEWSW'] fw = ['kill', 'murder', 'punch', 'attack'] so = ['trump', 'clinton', 'obama', 'media'] input_df = _gen_test_input(pn, n, fw, so) daily_freq = daily_frequency(input_df, date_index, ic) pd.testing.assert_frame_equal(daily_freq, expected_metaphor_freq_all) daily_freq_by_network = daily_frequency( input_df, date_index, ic, by=['network'] )[['MSNBCW', 'CNNW', 'FOXNEWSW']] expected_metaphor_freq_by_network = pd.DataFrame( index=date_index, data=[ (0, 2, 1), (2.5, np.nan, .5), (0, np.nan, 1), (np.nan, 0, 1) ], dtype=np.float64, columns=pd.Index(['MSNBCW', 'CNNW', 'FOXNEWSW'], name='network') ) pd.testing.assert_frame_equal( daily_freq_by_network, expected_metaphor_freq_by_network )
def get_obj_ts(df, obj, year=2016): ''' Get MV use frequency timeseries for a single subject, e.g., Hillary Clinton ''' df = df.copy() # Noticed some cases of, e.g., 'Donald Trump '. df.objects = df.objects.str.strip() obj_df = df[df.objects == obj] # If we have na at this step it's due to dividing by zero counts. return daily_frequency( obj_df, date_range(year), by=['objects'] # , predropna=True )[obj].fillna(0.0)
def data_for_model(year=2016, save_dir=None): ''' Create a dataframe with all series needed to make regressions of faceted MV frequencies. ''' # Create metaphorical violence frequency series across all networks. csv = os.path.join('Data', 'viomet-sep-nov-{}.csv'.format(year)) # viomet_df = pd.read_csv(url, na_values='', # parse_dates=['start_localtime']) project_df = get_project_data_frame(csv) project_df = project_df[project_df.include] freq_df = daily_frequency(project_df, date_range(year)) metvi_ts = pd.Series(index=freq_df.index, data=freq_df['freq'], dtype=float) days_from_debate = _days_from_debate(year, freq_df.index) # Create timeseries of tweets. if year == 2016: ts_data = dict( # Number of days before or after debate. days_from_debate=days_from_debate, # Twitter timeseries. trump=get_tweets_ts('trump'), clinton=get_tweets_ts('clinton'), # All metaphorical violence freq timeseries. metvi_all=metvi_ts, # Trump as subject or object metvi freq timeseries. metvi_trump_subj=get_subj_ts(project_df, 'Donald Trump'), metvi_trump_obj=get_obj_ts(project_df, 'Donald Trump'), # Clinton as subject or object metvi freq timeseries. metvi_clinton_subj=get_subj_ts(project_df, 'Hillary Clinton'), metvi_clinton_obj=get_obj_ts(project_df, 'Hillary Clinton'), # Metvi freq on networks timeseries. metvi_msnbc=get_network_ts(project_df, 'MSNBCW'), metvi_cnn=get_network_ts(project_df, 'CNNW'), metvi_foxnews=get_network_ts(project_df, 'FOXNEWSW')) elif year == 2012: ts_data = dict( # Number of days before or after debate. days_from_debate=days_from_debate, # Twitter timeseries. romney=get_tweets_ts('romney', year=2012), obama=get_tweets_ts('obama', year=2012), # All metaphorical violence freq timeseries. metvi_all=metvi_ts, # Trump as subject or object metvi freq timeseries. metvi_romney_subj=get_subj_ts(project_df, 'Mitt Romney', year=2012), metvi_romney_obj=get_obj_ts(project_df, 'Mitt Romney', year=2012), # Clinton as subject or object metvi freq timeseries. metvi_obama_subj=get_subj_ts(project_df, 'Barack Obama', year=2012), metvi_obama_obj=get_obj_ts(project_df, 'Barack Obama', year=2012), # Metvi freq on networks timeseries. metvi_msnbc=get_network_ts(project_df, 'MSNBCW', year=2012), metvi_cnn=get_network_ts(project_df, 'CNNW', year=2012), metvi_foxnews=get_network_ts(project_df, 'FOXNEWSW', year=2012)) return pd.DataFrame(ts_data)
def by_network_frequency_figure(project_df, date_range=pd.date_range('2016-09-01', '2016-11-30', freq='D'), freq=True, partition_infos=None, font_scale=1.15, save_path=None): # sns.axes_style("darkgrid") # sns.set(font_scale=font_scale) # CUR_PAL = sns.color_palette() # fits are not being shown for this condition if (partition_infos is None): if freq: network_freq = daily_frequency(project_df, date_range, by=['network']) network_freq.plot(style='o') else: full_df = daily_metaphor_counts( project_df, ['network'], date_range)[['MSNBCW', 'CNNW', 'FOXNEWSW']] full_df.plot(style='o') # show fits TODO Include more arguments so that fits don't have to be # generated just to plot. Generate fits outside and pass fits in. else: if freq: # put networks in desired order, left to right networks = ['MSNBCW', 'CNNW', 'FOXNEWSW'] line_styles = [':', '--', '-'] markers = ['s', 'o', '^'] # markers = ['bs', 'go', 'r^'] network_freq = daily_frequency(project_df, date_range, by=['network']) ax = network_freq[networks].plot(style=markers, mew=1, mfc='white', ms=6, alpha=0.9, legend=False, figsize=DEFAULT_FIGSIZE, mec='lightgrey') for net_idx, network in enumerate(networks): pinfo = partition_infos[network] day_td = timedelta(seconds=60) d0 = date_range[0] d1 = pinfo.partition_date_1 - day_td d2 = pinfo.partition_date_1 d3 = pinfo.partition_date_2 d4 = pinfo.partition_date_2 + day_td d5 = date_range[-1] fg = pinfo.f_ground fe = pinfo.f_excited dates = pd.DatetimeIndex([d0, d1, d2, d3, d4, d5]) datas = [fg, fg, fe, fe, fg, fg] network_formatted = ['MSNBC', 'CNN', 'Fox News'] pd.Series(index=dates, data=datas).plot( lw=3, ax=ax, color='k', ls=line_styles[net_idx], # legend=True, label=network_formatted[net_idx] ) ax.xaxis.set_minor_formatter(pltdates.DateFormatter('%-d')) ax.xaxis.set_minor_locator(pltdates.DayLocator(bymonthday=(1, 15))) yheight = 0.1 zo = 10 textargs = dict(size=13, ha='right', bbox=dict(alpha=0.6, color='white')) if date_range[0].year == 2016: ax.axvline(datetime(2016, 9, 26), ymax=yheight, color='k', zorder=zo) ax.axvline(datetime(2016, 10, 9), ymax=yheight, color='k', zorder=zo) ax.axvline(datetime(2016, 10, 19), ymax=yheight, color='k', zorder=zo) # ax.axhline( # yheight, color='k', xmin=0.278, xmax=0.535) # , '2016-9-26', '2016-10-19', zorder=1 # ) ax.text('2016-9-25', 0.2, "Debate #1", **textargs) ax.text('2016-10-8', 0.2, "#2", **textargs) ax.text('2016-10-18', 0.2, "#3", **textargs) ax.set_xlim(datetime(2016, 8, 31), datetime(2016, 11, 29)) if date_range[0].year == 2012: ax.axvline(datetime(2012, 10, 3), ymax=yheight, color='k', zorder=zo) ax.axvline(datetime(2012, 10, 16), ymax=yheight, color='k', zorder=zo) ax.axvline(datetime(2012, 10, 22), ymax=yheight, color='k', zorder=zo) # ax.axhline( # yheight, color='k', xmin=0.278, xmax=0.535) # , '2016-9-26', '2016-10-19', zorder=1 # ) ax.text('2012-10-2', 0.2, "Debate #1", **textargs) ax.text('2012-10-15', 0.2, "#2", **textargs) ax.text('2012-10-21', 0.2, "#3", **textargs) ax.set_xlim(datetime(2012, 8, 31), datetime(2012, 11, 29)) ax.grid(False) ax.set_xlabel('Date') ax.set_ylabel('Frequency of usage') ax.set_title('Metaphorical violence usage data and dynamic model') ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) plt.tight_layout() ax.set_ylim(-.25, 7.25) import matplotlib.lines as mlines net_lines = [] # Manually create the legend with line and marker styles. for net_idx, network in enumerate(network_formatted): net_lines.append( mlines.Line2D([], [], mfc='white', color='black', marker=markers[net_idx], markersize=7.5, ls=line_styles[net_idx], label=network, mec='gray')) # 3.8 handlelength to only have whole dashes. plt.legend(handles=net_lines, handlelength=3.8) if save_path is not None: fig = ax.get_figure() fig.savefig(save_path) plt.close()