for i in range(1, l_in):
    print(sys.argv[i * 2 + 4])
    print(sys.argv[i * 2 + 4 + 1])
    dftfidf = pd.read_pickle(sys.argv[i * 2 + 4])
    dfbypartyspeaker = pd.read_pickle(sys.argv[i * 2 + 4 + 1])

    # dfbypartyspeaker=dfbypartyspeaker[dfbypartyspeaker['Speaker Party'].isin(parties4)]
    dftfidf_filt = dftfidf[dftfidf.Phrase.isin(pp1).apply(lambda x: not x)]
    #%%
    if (indiv == 1 and len(fixed_phrases) <= 8):
        print(fixed_phrases)
        term1_tf, term1topN_tf = m.compute_tf_idf_new(dftfidf_filt,
                                                      'Speaker Party', n)
    elif len(fixed_phrases) <= 8:
        # dfoverall = dftfidf_filt.groupby('Phrase').sum()
        term1_tf, term1topN_tf = m.compute_tf_idf_old(None, dftfidf_filt, n)
    else:
        print(fixed_phrases)
        print(len(fixed_phrases))
        term1topN_tf = pd.read_csv(fixed_phrases, index_col=0)
        term1topN_tf.Phrase = term1topN_tf.Phrase.map(ast.literal_eval)
    #%%
    if N == 4:
        term1topN_tf = term1topN_tf[term1topN_tf['Speaker Party'].isin(
            parties4)]
        dfbypartyspeaker = dfbypartyspeaker[
            dfbypartyspeaker['Speaker Party'].isin(parties4)]
    elif N == 2:
        term1topN_tf = term1topN_tf[term1topN_tf['Speaker Party'].isin(
            parties2)]
        dfbypartyspeaker = dfbypartyspeaker[
Example #2
0
p2 = 'Schweizerische Volkspartei (SVP)'
p3 = 'FDP.Die Liberalen (FDP-Liberale)'
p4 = 'Christlichdemokratische Volkspartei der Schweiz (CVP)'
parties2 = [p1,p2]
parties4 = [p1,p2,p3,p4]

#%%
# dfbyparty=dfbyparty[dfbyparty['Speaker Party'].isin(parties2)]
#%%
dfbypartyspeaker=dfbypartyspeaker[dfbypartyspeaker['Speaker Party'].isin(parties2)]

#%%
dfoverall = dfbyparty.groupby('Phrase').sum()
dfoverall.reset_index(inplace=True)
#%%
term1_tf, term1topN_tf = m.compute_tf_idf_old(dfoverall,dfbyparty_filt,500)
#%%
term1topN_tf=term1topN_tf[term1topN_tf['Speaker Party'].isin(parties2)]

# %%
term1_topN_bySpeakerParty, topN = m.select_phrases_from_df2(dfbypartyspeaker,term1topN_tf,['Speaker Party','Speaker'])


# %% tfidf top 500
term1_topN_bySpeakerParty_scaled = m.make_share(term1_topN_bySpeakerParty)
term1_topN_bySpeakerParty_share = m.make_share(term1_topN_bySpeakerParty, scale=False)

# %% save results

term1_topN_bySpeakerParty.to_csv(sys.argv[3])
term1_topN_bySpeakerParty_scaled.to_csv(sys.argv[4])