Example #1
0
def max_dim_A():
    max_dim_A = 0
    df_NA_list = NA_list_preprocessed()
    # Results Data Frame
    winner_names = [
        "Imran Ahmed Khan Niazi", "IMRAN AHMED KHAN NIAZI",
        "Imran Ahmed Khan Niazi", "Imran Ahmad Khan Niazi",
        "Mian Muhammad Shehbaz Sharif", "Parvez Elahi",
        "Bilawal Bhutto Zerdari", "Chaudhary Nisar Ali Khan",
        "Ahsan iqbal chaudhary", "Asif Ali Zadari",
        "Muhammad Hamza Shehbaz Sharif"
    ]
    constituencies = [
        "NA-53", "NA-95", "NA-243", "NA-35", "NA-132", "NA-65", "NA-200",
        "NA-59", "NA-78", "NA-213", "NA-124"
    ]
    for k in range(len(winner_names)):
        constituency, winner_name = constituencies[k], winner_names[k]
        # slice data for one constituency
        is_relevant_constituency = df_NA_list[
            "Constituency Number (ID)"] == constituency
        current_constituency_data = df_NA_list[is_relevant_constituency]
        y_output = (current_constituency_data["Name of candidate"] ==
                    winner_name).tolist()
        y_output = np.array(y_output).astype(int)
        if y_output.shape[0] > max_dim_A:
            max_dim_A = y_output.shape[0]

        return max_dim_A
Example #2
0
def results_to_party(file_path):
    df_result_1 = pd.read_csv(file_path)
    df_NA_list = NA_list_preprocessed()

    party_list = []
    for serial_number in df_result_1["Predicted Winning Serial Number"]:
        party = df_NA_list[df_NA_list["Serial Number"] ==
                           serial_number]["Party Affiliation"].tolist()[0]
        party_list.append(party)

    df_result_1["Party"] = party_list
    df_result_1.to_csv("data/results/result_party.csv", index=False)
    party_distribution = df_result_1["Party"].value_counts()
    party_distribution.to_csv("data/results/parties_result.csv", index=True)
    print(party_distribution)
    party_result = df_result_1
    return party_result, party_distribution
Example #3
0
def final_model(paras):
    para0, para1, para2, para3, para4, para5, para6, para7, para8, para9, para10, para11 = paras
    print("....")
    df_NA_list = NA_list_preprocessed()
    constituencies = df_NA_list["Constituency Number (ID)"].unique().tolist()
    constituencies = np.asarray(constituencies)

    serial_number = [
    ]  #["924","1054","2171","1509","1359","1540","2029","1293","356","1729","2362","1619","1826","2362"]
    rigged_constituencies = [
    ]  #["NA-213","NA-223","NA-108","NA-256","NA-247","NA-53","NA-95","NA-243","NA-35","NA-132","NA-65","NA-69","NA-78","NA-124"]
    rigged_candidates = [
    ]  #["Asif Ali Zadari","Makhdoom Jamil uz Zaman","ABID SHER ALI","Muhammad Najeeb Haroo","Arf Ur Rehman Alvi","Imran Ahmed Khan Niazi","IMRAN AHMED KHAN NIAZI","Imran Ahmed Khan Niazi","Imran Ahmad Khan Niazi","Mian Muhammad Shehbaz Sharif","Parvez Elahi","Chaudhary Nisar Ali Khan","Ahsan iqbal chaudhary","Muhammad Hamza Shehbaz Sharif"]
    df_rigged = pd.DataFrame({
        "constituency": rigged_constituencies,
        "candidate": rigged_candidates,
        "serial": serial_number
    })
    # Results Data Frame
    list_results = []
    for constituency in tqdm(constituencies):
        # slice data for one constituency
        is_relevant_constituency = df_NA_list[
            "Constituency Number (ID)"] == constituency
        current_constituency_data = df_NA_list[is_relevant_constituency]
        # predetermined constituncies
        if constituency in rigged_constituencies:

            winning_candidate_name = df_rigged[
                df_rigged["constituency"] ==
                constituency]["candidate"].tolist()[0]

            winning_serial_number = df_rigged[
                df_rigged["constituency"] ==
                constituency]["serial"].tolist()[0]
            list_results.append(
                [constituency, winning_serial_number, winning_candidate_name])
        else:
            # predict
            candidate_prob = para0 * np.array(
                predict_dunya(current_constituency_data))
            result_file_name = [
                "Gallup_2017_1.csv", "Gallup_2017_2.csv", "Gallup_2018_1.csv",
                "Gallup_2018_2.csv", "IPOR_2018_1.csv"
            ]
            candidate_prob += para1 * np.array(
                predict_gallup(current_constituency_data,
                               survey_name=result_file_name[0]))
            candidate_prob += para2 * np.array(
                predict_gallup(current_constituency_data,
                               survey_name=result_file_name[1]))
            candidate_prob += para3 * np.array(
                predict_gallup(current_constituency_data,
                               survey_name=result_file_name[2]))
            candidate_prob += para4 * np.array(
                predict_gallup(current_constituency_data,
                               survey_name=result_file_name[3]))
            candidate_prob += para5 * np.array(
                predict_gallup(current_constituency_data,
                               survey_name=result_file_name[4]))
            candidate_prob += para6 * np.array(
                predict_partyHistory(current_constituency_data))

            result_file_name = [
                "results_1997.csv", "results_2002.csv", "results_2008.csv",
                "results_2013.csv"
            ]
            candidate_prob += para7 * np.array(
                predict_districtHistory(current_constituency_data,
                                        file_name=result_file_name[0]))
            candidate_prob += para8 * np.array(
                predict_districtHistory(current_constituency_data,
                                        file_name=result_file_name[1]))
            candidate_prob += para9 * np.array(
                predict_districtHistory(current_constituency_data,
                                        file_name=result_file_name[2]))
            candidate_prob += para10 * np.array(
                predict_districtHistory(current_constituency_data,
                                        file_name=result_file_name[3]))
            candidate_prob += para11 * np.array(
                predict_twitter(current_constituency_data))

            list_candidates = current_constituency_data[
                "Name of candidate"].tolist()
            winning_candidate_name = list_candidates[np.argsort(candidate_prob)
                                                     [-1]]
            winning_candidate = current_constituency_data[
                "Name of candidate"] == winning_candidate_name
            winning_serial_number = current_constituency_data[
                winning_candidate]["Serial Number"].tolist()[0]
            list_results.append(
                [constituency, winning_serial_number, winning_candidate_name])

    df_results = pd.DataFrame(list_results,
                              columns=[
                                  'Constituency',
                                  'Predicted Winning Serial Number',
                                  'Predicted Winning Name of Candidate'
                              ])

    # save as results to csv
    df_results.to_csv("results/final_result.csv", index=False)
    seat_wise_result = df_results
    # Saves result in term of party representaion
    seat_wise_result, party_wise_result = results_to_party(
        "results/final_result.csv")

    return party_wise_result, seat_wise_result
Example #4
0
# -*- coding: utf-8 -*-
"""
@descrption: Compares predicted results with the origianl reults
@author: Awais
"""
import pandas as pd
import json
import os
import numpy as np
import time
from preprocessing import Vote_distribution_preprocessed, Result_2018, NA_list_preprocessed
from la import l2_Exact, l1_LP
from model import final_model

df_NA_list_2018 = NA_list_preprocessed()


#==============================================================================
#  processes on real results taken from Pakistan's Election Commission
#==============================================================================
def real_results():
    df_NA_list = Result_2018()
    constituencies = df_NA_list.seat.unique().tolist()
    # Results Data Frame
    list_results = []
    for constituency in constituencies:
        print([constituency])
        # slice data for one constituency
        is_relevant_constituency = df_NA_list["seat"] == constituency
        current_constituency_data = df_NA_list[is_relevant_constituency]
Example #5
0
def matrix_A():
    para0, para1, para2, para3, para4, para5, para6, para7, para8, para9, para10, para11 = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
    df_NA_list = NA_list_preprocessed()
    # Results Data Frame
    winner_names = [
        "Imran Ahmed Khan Niazi", "IMRAN AHMED KHAN NIAZI",
        "Imran Ahmed Khan Niazi", "Imran Ahmad Khan Niazi",
        "Mian Muhammad Shehbaz Sharif", "Parvez Elahi",
        "Bilawal Bhutto Zerdari", "Chaudhary Nisar Ali Khan",
        "Ahsan iqbal chaudhary", "Asif Ali Zadari",
        "Muhammad Hamza Shehbaz Sharif"
    ]
    constituencies = [
        "NA-53", "NA-95", "NA-243", "NA-35", "NA-132", "NA-65", "NA-200",
        "NA-59", "NA-78", "NA-213", "NA-124"
    ]

    A = np.zeros([12, max_dim_A() * 11])
    Y = np.zeros([374, 1])
    for k in range(len(winner_names)):
        constituency, winner_name = constituencies[k], winner_names[k]
        # slice data for one constituency
        is_relevant_constituency = df_NA_list[
            "Constituency Number (ID)"] == constituency
        current_constituency_data = df_NA_list[is_relevant_constituency]
        y_output = (current_constituency_data["Name of candidate"] ==
                    winner_name).tolist()
        y_output = np.array(y_output).astype(int)
        y_output_paded = np.zeros([34, 1])
        y_output_paded[:y_output.shape[0]] = y_output.reshape(-1, 1)
        Y[k:k + 34, :] = y_output_paded
        can = np.zeros([12, y_output.shape[0]])
        # predict
        can[0] = para0 * np.array(predict_dunya(current_constituency_data))
        result_file_name = [
            "Gallup_2017_1.csv", "Gallup_2017_2.csv", "Gallup_2018_1.csv",
            "Gallup_2018_2.csv", "IPOR_2018_1.csv"
        ]
        can[1] = para1 * np.array(
            predict_gallup(current_constituency_data,
                           survey_name=result_file_name[0]))
        can[2] = para2 * np.array(
            predict_gallup(current_constituency_data,
                           survey_name=result_file_name[1]))
        can[3] = para3 * np.array(
            predict_gallup(current_constituency_data,
                           survey_name=result_file_name[2]))
        can[4] = para4 * np.array(
            predict_gallup(current_constituency_data,
                           survey_name=result_file_name[3]))
        can[5] = para5 * np.array(
            predict_gallup(current_constituency_data,
                           survey_name=result_file_name[4]))
        can[6] = para6 * np.array(
            predict_partyHistory(current_constituency_data))

        result_file_name = [
            "results_1997.csv", "results_2002.csv", "results_2008.csv",
            "results_2013.csv"
        ]
        can[7] = para7 * np.array(
            predict_districtHistory(current_constituency_data,
                                    file_name=result_file_name[0]))
        can[8] = para8 * np.array(
            predict_districtHistory(current_constituency_data,
                                    file_name=result_file_name[1]))
        can[9] = para9 * np.array(
            predict_districtHistory(current_constituency_data,
                                    file_name=result_file_name[2]))
        can[10] = para10 * np.array(
            predict_districtHistory(current_constituency_data,
                                    file_name=result_file_name[3]))
        can[11] = para11 * np.array(predict_twitter(current_constituency_data))
        A[:can.shape[0], :can.shape[1]] = can
        A = np.nan_to_num(A, 0)
    return A, Y
Example #6
0
File: ml.py Project: ebold/GE2018
def hyper_parameter_optimization_l2(para0, para1, para2, para3, para4, para5,
                                    para6, para7, para8, para9, para10, para11,
                                    para12):

    df_NA_list = NA_list_preprocessed()
    # Results Data Frame
    winner_names = [
        "Imran Ahmed Khan Niazi", "IMRAN AHMED KHAN NIAZI",
        "Imran Ahmed Khan Niazi", "Imran Ahmad Khan Niazi",
        "Mian Muhammad Shehbaz Sharif", "Parvez Elahi",
        "Bilawal Bhutto Zerdari", "Chaudhary Nisar Ali Khan",
        "Ahsan iqbal chaudhary", "Asif Ali Zadari",
        "Muhammad Hamza Shehbaz Sharif"
    ]
    constituencies = [
        "NA-53", "NA-95", "NA-243", "NA-35", "NA-132", "NA-65", "NA-200",
        "NA-59", "NA-78", "NA-213", "NA-124"
    ]
    diff = 0
    for k in range(len(winner_names)):
        constituency, winner_name = constituencies[k], winner_names[k]
        # slice data for one constituency
        is_relevant_constituency = df_NA_list[
            "Constituency Number (ID)"] == constituency
        current_constituency_data = df_NA_list[is_relevant_constituency]
        y_output = (current_constituency_data["Name of candidate"] ==
                    winner_name).tolist()
        y_output = np.array(y_output).astype(int)
        # predict
        candidate_prob = para0 * np.array(
            predict_dunya(current_constituency_data))
        result_file_name = [
            "Gallup_2017_1.csv", "Gallup_2017_2.csv", "Gallup_2018_1.csv",
            "Gallup_2018_2.csv", "IPOR_2018_1.csv"
        ]
        candidate_prob += para1 * np.array(
            predict_gallup(current_constituency_data,
                           survey_name=result_file_name[0]))
        candidate_prob += para2 * np.array(
            predict_gallup(current_constituency_data,
                           survey_name=result_file_name[1]))
        candidate_prob += para3 * np.array(
            predict_gallup(current_constituency_data,
                           survey_name=result_file_name[2]))
        candidate_prob += para4 * np.array(
            predict_gallup(current_constituency_data,
                           survey_name=result_file_name[3]))
        candidate_prob += para5 * np.array(
            predict_gallup(current_constituency_data,
                           survey_name=result_file_name[4]))
        candidate_prob += para6 * np.array(
            predict_partyHistory(current_constituency_data))

        result_file_name = [
            "results_1997.csv", "results_2002.csv", "results_2008.csv",
            "results_2013.csv"
        ]
        candidate_prob += para7 * np.array(
            predict_districtHistory(current_constituency_data,
                                    file_name=result_file_name[0]))
        candidate_prob += para8 * np.array(
            predict_districtHistory(current_constituency_data,
                                    file_name=result_file_name[1]))
        candidate_prob += para9 * np.array(
            predict_districtHistory(current_constituency_data,
                                    file_name=result_file_name[2]))
        candidate_prob += para10 * np.array(
            predict_districtHistory(current_constituency_data,
                                    file_name=result_file_name[3]))
        candidate_prob += para11 * np.array(
            predict_twitter(current_constituency_data))
        candidate_prob += para12 * np.array(
            predict_random(current_constituency_data))
        diff += np.sum(np.power((candidate_prob - y_output), 2))
        return -1 * diff