for i in range(1, l_in): print(sys.argv[i * 2 + 4]) print(sys.argv[i * 2 + 4 + 1]) dftfidf = pd.read_pickle(sys.argv[i * 2 + 4]) dfbypartyspeaker = pd.read_pickle(sys.argv[i * 2 + 4 + 1]) # dfbypartyspeaker=dfbypartyspeaker[dfbypartyspeaker['Speaker Party'].isin(parties4)] dftfidf_filt = dftfidf[dftfidf.Phrase.isin(pp1).apply(lambda x: not x)] #%% if (indiv == 1 and len(fixed_phrases) <= 8): print(fixed_phrases) term1_tf, term1topN_tf = m.compute_tf_idf_new(dftfidf_filt, 'Speaker Party', n) elif len(fixed_phrases) <= 8: # dfoverall = dftfidf_filt.groupby('Phrase').sum() term1_tf, term1topN_tf = m.compute_tf_idf_old(None, dftfidf_filt, n) else: print(fixed_phrases) print(len(fixed_phrases)) term1topN_tf = pd.read_csv(fixed_phrases, index_col=0) term1topN_tf.Phrase = term1topN_tf.Phrase.map(ast.literal_eval) #%% if N == 4: term1topN_tf = term1topN_tf[term1topN_tf['Speaker Party'].isin( parties4)] dfbypartyspeaker = dfbypartyspeaker[ dfbypartyspeaker['Speaker Party'].isin(parties4)] elif N == 2: term1topN_tf = term1topN_tf[term1topN_tf['Speaker Party'].isin( parties2)] dfbypartyspeaker = dfbypartyspeaker[
p2 = 'Schweizerische Volkspartei (SVP)' p3 = 'FDP.Die Liberalen (FDP-Liberale)' p4 = 'Christlichdemokratische Volkspartei der Schweiz (CVP)' parties2 = [p1,p2] parties4 = [p1,p2,p3,p4] #%% # dfbyparty=dfbyparty[dfbyparty['Speaker Party'].isin(parties2)] #%% dfbypartyspeaker=dfbypartyspeaker[dfbypartyspeaker['Speaker Party'].isin(parties2)] #%% dfoverall = dfbyparty.groupby('Phrase').sum() dfoverall.reset_index(inplace=True) #%% term1_tf, term1topN_tf = m.compute_tf_idf_old(dfoverall,dfbyparty_filt,500) #%% term1topN_tf=term1topN_tf[term1topN_tf['Speaker Party'].isin(parties2)] # %% term1_topN_bySpeakerParty, topN = m.select_phrases_from_df2(dfbypartyspeaker,term1topN_tf,['Speaker Party','Speaker']) # %% tfidf top 500 term1_topN_bySpeakerParty_scaled = m.make_share(term1_topN_bySpeakerParty) term1_topN_bySpeakerParty_share = m.make_share(term1_topN_bySpeakerParty, scale=False) # %% save results term1_topN_bySpeakerParty.to_csv(sys.argv[3]) term1_topN_bySpeakerParty_scaled.to_csv(sys.argv[4])