def max_dim_A(): max_dim_A = 0 df_NA_list = NA_list_preprocessed() # Results Data Frame winner_names = [ "Imran Ahmed Khan Niazi", "IMRAN AHMED KHAN NIAZI", "Imran Ahmed Khan Niazi", "Imran Ahmad Khan Niazi", "Mian Muhammad Shehbaz Sharif", "Parvez Elahi", "Bilawal Bhutto Zerdari", "Chaudhary Nisar Ali Khan", "Ahsan iqbal chaudhary", "Asif Ali Zadari", "Muhammad Hamza Shehbaz Sharif" ] constituencies = [ "NA-53", "NA-95", "NA-243", "NA-35", "NA-132", "NA-65", "NA-200", "NA-59", "NA-78", "NA-213", "NA-124" ] for k in range(len(winner_names)): constituency, winner_name = constituencies[k], winner_names[k] # slice data for one constituency is_relevant_constituency = df_NA_list[ "Constituency Number (ID)"] == constituency current_constituency_data = df_NA_list[is_relevant_constituency] y_output = (current_constituency_data["Name of candidate"] == winner_name).tolist() y_output = np.array(y_output).astype(int) if y_output.shape[0] > max_dim_A: max_dim_A = y_output.shape[0] return max_dim_A
def results_to_party(file_path): df_result_1 = pd.read_csv(file_path) df_NA_list = NA_list_preprocessed() party_list = [] for serial_number in df_result_1["Predicted Winning Serial Number"]: party = df_NA_list[df_NA_list["Serial Number"] == serial_number]["Party Affiliation"].tolist()[0] party_list.append(party) df_result_1["Party"] = party_list df_result_1.to_csv("data/results/result_party.csv", index=False) party_distribution = df_result_1["Party"].value_counts() party_distribution.to_csv("data/results/parties_result.csv", index=True) print(party_distribution) party_result = df_result_1 return party_result, party_distribution
def final_model(paras): para0, para1, para2, para3, para4, para5, para6, para7, para8, para9, para10, para11 = paras print("....") df_NA_list = NA_list_preprocessed() constituencies = df_NA_list["Constituency Number (ID)"].unique().tolist() constituencies = np.asarray(constituencies) serial_number = [ ] #["924","1054","2171","1509","1359","1540","2029","1293","356","1729","2362","1619","1826","2362"] rigged_constituencies = [ ] #["NA-213","NA-223","NA-108","NA-256","NA-247","NA-53","NA-95","NA-243","NA-35","NA-132","NA-65","NA-69","NA-78","NA-124"] rigged_candidates = [ ] #["Asif Ali Zadari","Makhdoom Jamil uz Zaman","ABID SHER ALI","Muhammad Najeeb Haroo","Arf Ur Rehman Alvi","Imran Ahmed Khan Niazi","IMRAN AHMED KHAN NIAZI","Imran Ahmed Khan Niazi","Imran Ahmad Khan Niazi","Mian Muhammad Shehbaz Sharif","Parvez Elahi","Chaudhary Nisar Ali Khan","Ahsan iqbal chaudhary","Muhammad Hamza Shehbaz Sharif"] df_rigged = pd.DataFrame({ "constituency": rigged_constituencies, "candidate": rigged_candidates, "serial": serial_number }) # Results Data Frame list_results = [] for constituency in tqdm(constituencies): # slice data for one constituency is_relevant_constituency = df_NA_list[ "Constituency Number (ID)"] == constituency current_constituency_data = df_NA_list[is_relevant_constituency] # predetermined constituncies if constituency in rigged_constituencies: winning_candidate_name = df_rigged[ df_rigged["constituency"] == constituency]["candidate"].tolist()[0] winning_serial_number = df_rigged[ df_rigged["constituency"] == constituency]["serial"].tolist()[0] list_results.append( [constituency, winning_serial_number, winning_candidate_name]) else: # predict candidate_prob = para0 * np.array( predict_dunya(current_constituency_data)) result_file_name = [ "Gallup_2017_1.csv", "Gallup_2017_2.csv", "Gallup_2018_1.csv", "Gallup_2018_2.csv", "IPOR_2018_1.csv" ] candidate_prob += para1 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[0])) candidate_prob += para2 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[1])) candidate_prob += para3 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[2])) candidate_prob += para4 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[3])) candidate_prob += para5 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[4])) candidate_prob += para6 * np.array( predict_partyHistory(current_constituency_data)) result_file_name = [ "results_1997.csv", "results_2002.csv", "results_2008.csv", "results_2013.csv" ] candidate_prob += para7 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[0])) candidate_prob += para8 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[1])) candidate_prob += para9 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[2])) candidate_prob += para10 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[3])) candidate_prob += para11 * np.array( predict_twitter(current_constituency_data)) list_candidates = current_constituency_data[ "Name of candidate"].tolist() winning_candidate_name = list_candidates[np.argsort(candidate_prob) [-1]] winning_candidate = current_constituency_data[ "Name of candidate"] == winning_candidate_name winning_serial_number = current_constituency_data[ winning_candidate]["Serial Number"].tolist()[0] list_results.append( [constituency, winning_serial_number, winning_candidate_name]) df_results = pd.DataFrame(list_results, columns=[ 'Constituency', 'Predicted Winning Serial Number', 'Predicted Winning Name of Candidate' ]) # save as results to csv df_results.to_csv("results/final_result.csv", index=False) seat_wise_result = df_results # Saves result in term of party representaion seat_wise_result, party_wise_result = results_to_party( "results/final_result.csv") return party_wise_result, seat_wise_result
# -*- coding: utf-8 -*- """ @descrption: Compares predicted results with the origianl reults @author: Awais """ import pandas as pd import json import os import numpy as np import time from preprocessing import Vote_distribution_preprocessed, Result_2018, NA_list_preprocessed from la import l2_Exact, l1_LP from model import final_model df_NA_list_2018 = NA_list_preprocessed() #============================================================================== # processes on real results taken from Pakistan's Election Commission #============================================================================== def real_results(): df_NA_list = Result_2018() constituencies = df_NA_list.seat.unique().tolist() # Results Data Frame list_results = [] for constituency in constituencies: print([constituency]) # slice data for one constituency is_relevant_constituency = df_NA_list["seat"] == constituency current_constituency_data = df_NA_list[is_relevant_constituency]
def matrix_A(): para0, para1, para2, para3, para4, para5, para6, para7, para8, para9, para10, para11 = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 df_NA_list = NA_list_preprocessed() # Results Data Frame winner_names = [ "Imran Ahmed Khan Niazi", "IMRAN AHMED KHAN NIAZI", "Imran Ahmed Khan Niazi", "Imran Ahmad Khan Niazi", "Mian Muhammad Shehbaz Sharif", "Parvez Elahi", "Bilawal Bhutto Zerdari", "Chaudhary Nisar Ali Khan", "Ahsan iqbal chaudhary", "Asif Ali Zadari", "Muhammad Hamza Shehbaz Sharif" ] constituencies = [ "NA-53", "NA-95", "NA-243", "NA-35", "NA-132", "NA-65", "NA-200", "NA-59", "NA-78", "NA-213", "NA-124" ] A = np.zeros([12, max_dim_A() * 11]) Y = np.zeros([374, 1]) for k in range(len(winner_names)): constituency, winner_name = constituencies[k], winner_names[k] # slice data for one constituency is_relevant_constituency = df_NA_list[ "Constituency Number (ID)"] == constituency current_constituency_data = df_NA_list[is_relevant_constituency] y_output = (current_constituency_data["Name of candidate"] == winner_name).tolist() y_output = np.array(y_output).astype(int) y_output_paded = np.zeros([34, 1]) y_output_paded[:y_output.shape[0]] = y_output.reshape(-1, 1) Y[k:k + 34, :] = y_output_paded can = np.zeros([12, y_output.shape[0]]) # predict can[0] = para0 * np.array(predict_dunya(current_constituency_data)) result_file_name = [ "Gallup_2017_1.csv", "Gallup_2017_2.csv", "Gallup_2018_1.csv", "Gallup_2018_2.csv", "IPOR_2018_1.csv" ] can[1] = para1 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[0])) can[2] = para2 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[1])) can[3] = para3 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[2])) can[4] = para4 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[3])) can[5] = para5 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[4])) can[6] = para6 * np.array( predict_partyHistory(current_constituency_data)) result_file_name = [ "results_1997.csv", "results_2002.csv", "results_2008.csv", "results_2013.csv" ] can[7] = para7 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[0])) can[8] = para8 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[1])) can[9] = para9 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[2])) can[10] = para10 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[3])) can[11] = para11 * np.array(predict_twitter(current_constituency_data)) A[:can.shape[0], :can.shape[1]] = can A = np.nan_to_num(A, 0) return A, Y
def hyper_parameter_optimization_l2(para0, para1, para2, para3, para4, para5, para6, para7, para8, para9, para10, para11, para12): df_NA_list = NA_list_preprocessed() # Results Data Frame winner_names = [ "Imran Ahmed Khan Niazi", "IMRAN AHMED KHAN NIAZI", "Imran Ahmed Khan Niazi", "Imran Ahmad Khan Niazi", "Mian Muhammad Shehbaz Sharif", "Parvez Elahi", "Bilawal Bhutto Zerdari", "Chaudhary Nisar Ali Khan", "Ahsan iqbal chaudhary", "Asif Ali Zadari", "Muhammad Hamza Shehbaz Sharif" ] constituencies = [ "NA-53", "NA-95", "NA-243", "NA-35", "NA-132", "NA-65", "NA-200", "NA-59", "NA-78", "NA-213", "NA-124" ] diff = 0 for k in range(len(winner_names)): constituency, winner_name = constituencies[k], winner_names[k] # slice data for one constituency is_relevant_constituency = df_NA_list[ "Constituency Number (ID)"] == constituency current_constituency_data = df_NA_list[is_relevant_constituency] y_output = (current_constituency_data["Name of candidate"] == winner_name).tolist() y_output = np.array(y_output).astype(int) # predict candidate_prob = para0 * np.array( predict_dunya(current_constituency_data)) result_file_name = [ "Gallup_2017_1.csv", "Gallup_2017_2.csv", "Gallup_2018_1.csv", "Gallup_2018_2.csv", "IPOR_2018_1.csv" ] candidate_prob += para1 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[0])) candidate_prob += para2 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[1])) candidate_prob += para3 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[2])) candidate_prob += para4 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[3])) candidate_prob += para5 * np.array( predict_gallup(current_constituency_data, survey_name=result_file_name[4])) candidate_prob += para6 * np.array( predict_partyHistory(current_constituency_data)) result_file_name = [ "results_1997.csv", "results_2002.csv", "results_2008.csv", "results_2013.csv" ] candidate_prob += para7 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[0])) candidate_prob += para8 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[1])) candidate_prob += para9 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[2])) candidate_prob += para10 * np.array( predict_districtHistory(current_constituency_data, file_name=result_file_name[3])) candidate_prob += para11 * np.array( predict_twitter(current_constituency_data)) candidate_prob += para12 * np.array( predict_random(current_constituency_data)) diff += np.sum(np.power((candidate_prob - y_output), 2)) return -1 * diff