예제 #1
0
def get_data_bar(url, ylabel, min_index, max_index):
    # import libraries
    import my_function as func
    # specify the url
    soup = func.getData(url)

    # find results within table
    result_wilayah = soup.find('table', attrs={'id': 'tableLeftBottom'})
    result_value = soup.find('table', attrs={'id': 'tableRightBottom'})

    rows_wilayah = result_wilayah.find_all('tr')
    rows_value = result_value.find_all('tr')

    list_wilayah = []
    value = []

    # print(rows)
    for id, r in enumerate(rows_wilayah[:-1]):
        # find all columns per result
        data_wilayah = r.find_all('td', attrs={'id': 'th4'})
        data_value = rows_value[id].find_all('td', attrs={'class': 'datas'})

        # check that columns have data
        if len(data_wilayah) == 0:
            continue

        # write columns to variables
        wilayah = data_wilayah[0].getText()
        nilai = data_value[-1].getText()
        # Remove decimal point
        nilai = nilai.replace('.', '')
        # Cast Data Type Integer
        nilai = int(nilai)
        list_wilayah.append(wilayah)
        value.append(nilai)

    # Create Dictionary
    my_dict = {'wilayah': list_wilayah, 'value': value}
    # Create Dataframe
    df = pd.DataFrame(my_dict)

    # Plot dataframe to histogram
    plt.bar(df['wilayah'], df['value'])
    plt.xlabel('Provinsi')
    plt.ylabel(ylabel)
    if min_index == 0 :
        min_index = 0
    else:
        min_index = df['value'].min() - min_index

    plt.gca().set_ylim([min_index, df['value'].max() + max_index])
    plt.xticks(rotation='50', ha='right')
    plt.show()
예제 #2
0
# import libraries
import numpy as np
import pandas as pd
import my_function as func
import matplotlib.pyplot as plt
# specify the url
url = 'https://www.bps.go.id/dynamictable/2016/06/16/1211/indeks-pembangunan-manusia-menurut-provinsi-2010-2018-metode-baru-.html'
soup = func.getData(url)

# find results within table
result_wilayah = soup.find('table', attrs={'id': 'tableLeftBottom'})
result_value = soup.find('table', attrs={'id': 'tableRightBottom'})

rows_wilayah = result_wilayah.find_all('tr')
rows_value = result_value.find_all('tr')

list_wilayah = []
value = []

# print(rows)
for id, r in enumerate(rows_wilayah[:-1]):
    # find all columns per result
    data_wilayah = r.find_all('td', attrs={'id': 'th4'})
    data_value = rows_value[id].find_all('td', attrs={'class': 'datas'})

    # check that columns have data
    if len(data_wilayah) == 0:
        continue

    # write columns to variables
    wilayah = data_wilayah[0].find('b').getText()
예제 #3
0
def get_regression_pemilu(url2, ylabel):
    # import libraries
    import my_function as func

    # specify the url
    url1 = 'https://kawalpemilu.org/#pilpres:0'
    soup1 = func.getData(url1)

    # find results within table
    results1 = soup1.find('table', {'class': 'table'})
    rows1 = results1.find_all('tr', {'class': 'row'})
    # list_wilayah = []
    jokowi = []
    prabowo = []

    # print(rows)
    for r in rows1[:-1]:
        # find all columns per result
        data = r.find_all('td')
        # check that columns have data
        if len(data) == 0:
            continue
        # write columns to variables
        # wilayah = data[1].find('a').getText()
        satu = data[2].find('span', attrs={'class': 'abs'}).getText()
        dua = data[3].find('span', attrs={'class': 'abs'}).getText()
        # Remove decimal point
        satu = satu.replace('.', '')
        dua = dua.replace('.', '')
        # Cast Data Type Integer
        satu = int(satu)
        dua = int(dua)
        # list_wilayah.append(wilayah)
        jokowi.append(satu)
        prabowo.append(dua)

    soup2 = func.getData(url2)

    # find results within table
    result_value = soup2.find('table', attrs={'id': 'tableRightBottom'})
    rows_value = result_value.find_all('tr')
    target_value = []

    # print(rows)
    for id, r in enumerate(rows_value[:-1]):
        # find all columns per result
        data_value = rows_value[id].find_all('td', attrs={'class': 'datas'})

        # check that columns have data
        if len(data_value) == 0:
            continue

        # write columns to variables
        nilai = data_value[-1].getText()
        # Remove decimal point
        nilai = nilai.replace('.', '')
        # Cast Data Type Integer
        nilai = int(nilai)
        target_value.append(nilai)

    my_dict = {'jokowi': jokowi, 'prabowo': prabowo, 'target_value': target_value}
    # Create Dataframe
    df = pd.DataFrame(my_dict)
    jokowi = df['jokowi'].values
    prabowo = df['prabowo'].values
    target_value = df['target_value'].values

    jokowi = jokowi.reshape(-1, 1)
    prabowo = prabowo.reshape(-1, 1)
    target_value = target_value.reshape(-1, 1)

    # Fitting Simple Linear Regression
    reg1 = LinearRegression()
    reg2 = LinearRegression()

    # Create the prediction space
    prediction_space1 = np.linspace(min(jokowi), max(jokowi)).reshape(-1, 1)
    prediction_space2 = np.linspace(min(prabowo), max(prabowo)).reshape(-1, 1)

    # Fit the model to the data
    reg1.fit(jokowi, target_value)
    reg2.fit(prabowo, target_value)

    r_sq1 = reg1.score(jokowi, target_value)
    print('coefficient of determination Jokowi:', r_sq1)
    print('intercept Jokowi:', reg1.intercept_)
    print('slope Jokowi:', reg1.coef_)

    r_sq2 = reg1.score(prabowo, target_value)
    print('coefficient of determination Prabowo:', r_sq2)
    print('intercept Prabowo:', reg2.intercept_)
    print('slope Prabowo:', reg2.coef_)

    # Compute predictions over the prediction space: y_pred
    y_pred1 = reg1.predict(prediction_space1)
    y_pred2 = reg2.predict(prediction_space2)

    plt.scatter(jokowi, target_value, color='green', alpha=0.5, label='Jokowi')
    plt.scatter(prabowo, target_value, color='red', alpha=0.5, label='Prabowo')
    plt.plot(prediction_space1, y_pred1, color='green', linewidth=2, label='Jokowi Regression')
    plt.plot(prediction_space2, y_pred2, color='red', linewidth=2, label='Prabowo Regression')
    plt.title('Hasil Pemilu vs '+ylabel)
    plt.xlabel('Hasil Pemilu')
    plt.ylabel(ylabel)
    plt.legend(loc='best')
    plt.show()
예제 #4
0
# The path to where you have your chrome webdriver stored:
webdriver_path = 'C:/Program Files (x86)/Google/ChromeDriver/chromedriver.exe'

# Add arguments telling Selenium to not actually open a window
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--window-size=1920x1080')

# Fire up the headless browser
browser = webdriver.Chrome(executable_path=webdriver_path,
                           options=chrome_options)

# Load webpage
browser.get(url)
soup2 = func.getData(url2)

# It can be a good idea to wait for a few seconds before trying to parse the page
# to ensure that the page has loaded completely.
time.sleep(10)

# Parse HTML, close browser
soup = BeautifulSoup(browser.page_source, 'html.parser')
# print(soup)
pretty = soup.prettify()
browser.quit()
# find results within table
results = soup.find('table', {'class': 'table'})
rows = results.find_all('tr', {'class': 'row'})
array = []
jokowi = []
예제 #5
0
import numpy as np
import matplotlib.pyplot as plt
import my_function as func
import pandas as pd

url1 = 'https://www.bps.go.id/dynamictable/2018/04/16/1298/angka-harapan-hidup-saat-lahir-menurut-provinsi-2010-2018-metode-baru-.html'
soup1 = func.getData(url1)

# find results within table
result_wilayah = soup1.find('table', attrs={'id': 'tableLeftBottom'})
result_value = soup1.find('table', attrs={'id': 'tableRightBottom'})
rows_wilayah = result_wilayah.find_all('tr')
rows_value = result_value.find_all('tr')

data_ipm = {}

for id, r in enumerate(rows_wilayah[:-1]):
    # find all columns per result
    data_result = r.find_all('td', attrs={'id': 'th4'})
    data_value = rows_value[id].find_all('td', attrs={'class': 'datas'})
    # check that columns have data
    if len(data_result) == 0:
        continue

    wilayah = data_result[0].find('b').getText()
    wilayah = wilayah.replace('\n', '')
    nilai = data_value[-1].getText()
    # Remove decimal point
    nilai = nilai.replace('.', '')
    # Cast Data Type Integer
    nilai = int(nilai)
예제 #6
0
# import libraries
import numpy as np
import my_function as func
import matplotlib.pyplot as plt
# specify the url
url1 = 'https://kawalpemilu.org/#pilpres:0'
soup1 = func.getData(url1)

results1 = soup1.find('table',{'class':'table'})
rows1 = results1.find_all('tr',{'class':'row'})
list_wilayah1 = []
jokowi1 = []
prabowo1 = []

# print(rows)
for r in rows1:
    # find all columns per result
    data = r.find_all('td')
    # check that columns have data
    if len(data) == 0:
        continue
# write columns to variables
    wilayah = data[1].find('a').getText()
    if wilayah != 'KALIMANTAN UTARA':
        satu = data[2].find('span', attrs={'class':'abs'}).getText()
        dua = data[3].find('span', attrs={'class': 'abs'}).getText()
        # Remove decimal point
        satu = satu.replace('.','')
        dua = dua.replace('.','')
        # Cast Data Type Integer
        satu = int(satu)