예제 #1
0
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from src.config import config
import sys
import matplotlib.pyplot as plt

def z_score(df):
    return (df - np.mean(df))/np.std(df)

if __name__ == '__main__':
    PATH = config.get_dir()
    country = config.get_country()
    adm = config.get_headers(country, 'adm')
    cdr = config.get_headers(country, 'cdr')
    dhs = config.get_headers(country, 'dhs')
    data = pd.DataFrame(pd.read_csv(PATH+'/final/%s/master_2.0.csv'%country,
                                    usecols=['Pagerank', 'G_residuals',
                                             'EigenvectorCentrality',
                                             'BloodPosRateSL', 'Log_pop_density', 'BloodPosRate'])).dropna()

    data = data[data['BloodPosRate'] > 0]
    data = data.ix[1:]
    z_data = pd.DataFrame(z_score(data)).as_matrix()

    mse_all = []
    for i in range(5):
        mse_1, mse_2, mse_3, mse_4, mse_5 = [], [], [], [], []
        print 'Training set %d' % i
        prop = np.floor(len(data) / 2) + (i * 7)
예제 #2
0
def get_country_data(country):
    PATH = config.get_dir()
    return pd.DataFrame(pd.read_csv(PATH+'/final/%s/master_SL.csv' % country))