# creating new folder real-data which will contain cleaned data in csv files. if not os.path.exists("Data/Real-Data"): os.makedirs("Data/Real-Data") for year in range(2013, 2017): final_data = [] with open('Data/Real-Data/real_' + str(year) + '.csv', 'w') as csvfile: wr = csv.writer(csvfile, dialect='excel') # first row wr.writerow(['T', 'TM', 'Tm', 'SLP', 'H', 'W', 'V', 'VM', 'PM2.5']) # data added to final_data monthwise for month in range(1, 13): temp = met_data(month, year) final_data = final_data + temp # dependent feature pm = avg_data(year) if len(pm) == 364: pm.insert(364, '-') # combing independent features from final_data & dependent features from pm to form a complete dataset for i in range(len(final_data) - 1): final_data[i].insert(8, pm[i]) # storing complete dataset for a year in csv file & cleanind the data with open('Data/Real-Data/real_' + str(year) + '.csv', 'a') as csvfile: wr = csv.writer(csvfile, dialect='excel') for row in final_data: flag = 0 for elem in row: if elem == "" or elem == '-':
# -*- coding: utf-8 -*- """ Created on Sat May 23 14:55:05 2020 @author: stak """ import requests import sys import pandas as pd from bs4 import BeautifulSoup from Plot_AQI import avg_data import os as os import csv AQI_List = avg_data() def met_data(month, year): file_html = open('Data/Html_Data/{}/{}.html'.format(year, month), 'rb') plain_text = file_html.read() tempD = [] finalD = [] soup = BeautifulSoup(plain_text, "lxml") for table in soup.findAll('table', {'class': 'medias mensuales numspan'}): for tbody in table: for tr in tbody: a = tr.get_text()
if __name__ == "__main__": if not os.path.exists("Data/Real-Data"): os.makedirs("Data/Real-Data") for year in range(2013, 2017): yeardata = [] for month in range(1, 13): finalD = met_data(month, year) yeardata = yeardata + finalD # Get Depedent Feature lst = avg_data('Data/AQI/aqi{}.csv'.format(year)) # Combine Depedent and Independent Features for element in range(len(yeardata) - 1): yeardata[element].insert(8, lst[element]) with open("Data/Real-Data/real_{}.csv".format(year), "w") as csvfile: wr = csv.writer(csvfile, dialect='excel') wr.writerow( ['T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM', 'PM 2.5']) for row in yeardata: flag = 0 for element in row: if element in ('', '-'): flag = 1
os.makedirs("Data/Real-Data") for year in range(2014, 2017): #iterarting through each year final_data = [] #new list to store avg of csv file and html data with open( 'Data/Real-Data/real_' + str(year) + '.csv', 'w' ) as csvfile: #if that file doesn't exist ,it will create one wr = csv.writer(csvfile, dialect='excel') wr.writerow( ['T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM', 'PM 2.5']) #giving col names for month in range(1, 13): temp = met_data(month, year) final_data = final_data + temp #finala_data contains the whole year data at the end of for loop pm = avg_data(year) #to get avg pm[2.5] value of that year if len(pm) == 364: pm.insert(364, '-') for i in range(len(final_data) - 1): # final[i].insert(0, i + 1) final_data[i].insert(8, pm[i]) #inserting pm value to final_data with open('Data/Real-Data/real_' + str(year) + '.csv', 'a') as csvfile: wr = csv.writer(csvfile, dialect='excel') for row in final_data: flag = 0 for elem in row: if elem == "" or elem == "-": flag = 1