Exemple #1
0
    # creating new folder real-data which will contain cleaned data in csv files.
    if not os.path.exists("Data/Real-Data"):
        os.makedirs("Data/Real-Data")
    for year in range(2013, 2017):
        final_data = []
        with open('Data/Real-Data/real_' + str(year) + '.csv', 'w') as csvfile:
            wr = csv.writer(csvfile, dialect='excel')
            # first row
            wr.writerow(['T', 'TM', 'Tm', 'SLP', 'H', 'W', 'V', 'VM', 'PM2.5'])
        # data added to final_data monthwise
        for month in range(1, 13):
            temp = met_data(month, year)
            final_data = final_data + temp

        # dependent feature
        pm = avg_data(year)

        if len(pm) == 364:
            pm.insert(364, '-')

        # combing independent features from final_data & dependent features from pm to form a complete dataset
        for i in range(len(final_data) - 1):
            final_data[i].insert(8, pm[i])

        # storing complete dataset for a year in csv file & cleanind the data
        with open('Data/Real-Data/real_' + str(year) + '.csv', 'a') as csvfile:
            wr = csv.writer(csvfile, dialect='excel')
            for row in final_data:
                flag = 0
                for elem in row:
                    if elem == "" or elem == '-':
# -*- coding: utf-8 -*-
"""
Created on Sat May 23 14:55:05 2020

@author: stak
"""

import requests
import sys
import pandas as pd
from bs4 import BeautifulSoup
from Plot_AQI import avg_data
import os as os
import csv

AQI_List = avg_data()


def met_data(month, year):

    file_html = open('Data/Html_Data/{}/{}.html'.format(year, month), 'rb')
    plain_text = file_html.read()

    tempD = []
    finalD = []

    soup = BeautifulSoup(plain_text, "lxml")
    for table in soup.findAll('table', {'class': 'medias mensuales numspan'}):
        for tbody in table:
            for tr in tbody:
                a = tr.get_text()
Exemple #3
0

if __name__ == "__main__":

    if not os.path.exists("Data/Real-Data"):
        os.makedirs("Data/Real-Data")

    for year in range(2013, 2017):
        yeardata = []

        for month in range(1, 13):
            finalD = met_data(month, year)
            yeardata = yeardata + finalD

        # Get Depedent Feature
        lst = avg_data('Data/AQI/aqi{}.csv'.format(year))

        # Combine Depedent and Independent Features
        for element in range(len(yeardata) - 1):
            yeardata[element].insert(8, lst[element])

        with open("Data/Real-Data/real_{}.csv".format(year), "w") as csvfile:
            wr = csv.writer(csvfile, dialect='excel')
            wr.writerow(
                ['T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM', 'PM 2.5'])

            for row in yeardata:
                flag = 0
                for element in row:
                    if element in ('', '-'):
                        flag = 1
        os.makedirs("Data/Real-Data")
    for year in range(2014, 2017):  #iterarting through each year
        final_data = []  #new list to store avg of csv file and html data
        with open(
                'Data/Real-Data/real_' + str(year) + '.csv', 'w'
        ) as csvfile:  #if that file doesn't exist ,it will create one

            wr = csv.writer(csvfile, dialect='excel')
            wr.writerow(
                ['T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM',
                 'PM 2.5'])  #giving col names
        for month in range(1, 13):
            temp = met_data(month, year)
            final_data = final_data + temp  #finala_data contains the whole year data at the end of for loop

        pm = avg_data(year)  #to get avg pm[2.5] value of that year

        if len(pm) == 364:
            pm.insert(364, '-')

        for i in range(len(final_data) - 1):
            # final[i].insert(0, i + 1)
            final_data[i].insert(8, pm[i])  #inserting pm value to final_data

        with open('Data/Real-Data/real_' + str(year) + '.csv', 'a') as csvfile:
            wr = csv.writer(csvfile, dialect='excel')
            for row in final_data:
                flag = 0
                for elem in row:
                    if elem == "" or elem == "-":
                        flag = 1