Exemple #1
0
def main():
        
        initialize_db()     
        met_dept_api_url = "http://www.haze.gov.sg/haze-updates/historical-psi-readings/" 
       
        while True:
            current_url_date = get_date() #gets the first date unprocessed in the db
            
            data_list = []
            webpage_html = get_html (met_dept_api_url, current_url_date) #gets the webpage of the url in url_list
            if (webpage_html == None):
                print "There was an error getting the html files, retrying in 1 minute"
                time.sleep(60)
            else: #successfully got the html of the url
                data_list = get_datalist(webpage_html, current_url_date)
                print "Great Stuff--writing this to the DB"
                write_list_to_db(data_list,current_url_date) #this marks the date as processed in the db
                print "Done"
Exemple #2
0
def main():
        
        initialize_db()             
        met_dept_api_url_list = create_url_list() #get the url_list (refer to html_operations.py for the hardcoded values)
                       
        while True:
            current_url_date = get_date() #looks for a data assigned to this process id, repeats every 2 seconds till there is an assignment
            data_list = []
            webpage_htmls = get_html (met_dept_api_url_list, current_url_date) #gets the webpage of the url in url_list          

            if (webpage_htmls == None):
                print "There was an error getting the html files, retrying in 1 minute"
                time.sleep(60)
            else: #successfully got the html of the 4 urls
                for html_output in webpage_htmls:
                    html_datalist = get_datalist(html_output, current_url_date)
                    data_list = data_list + html_datalist
                print "Great Stuff--writing this to the DB"
                write_list_to_db(data_list,current_url_date)
Exemple #3
0
def get_api_readings():
        
        initialize_db()     
        met_dept_api_url = "http://www.haze.gov.sg/haze-updates/historical-psi-readings/" 
       
        while True:
            current_url_date = get_date() #gets the first date unprocessed in the db
            logging.info ("Getting data for %s" % current_url_date)
            data_list = []
            webpage_html = get_html (met_dept_api_url, current_url_date) #gets the webpage of the url in url_list
            if (webpage_html == None):
                logging.info ("There was an error getting the html files, retrying in 30 seconds")
                print ("Retrying in 30 seconds")
                time.sleep(30)
            else: #successfully got the html of the url
                data_list = get_datalist(webpage_html, current_url_date)
                logging.info  ("Great Stuff--writing this to the DB")
                write_list_to_db(data_list,current_url_date) #this marks the date as processed in the db
                logging.info  ("Done")