def test_URL(self, *args, **kwargs): """ Retrieves the full URL from the current instance. Uses the urllib2 library to load the page. If the page loads, sets the is_url_ok flag to true. Uses the results of the page load to see if there was a redirect (status of 301 or 302). If so, parses and stores information on the final URL that was loaded in the redirect_* fields in this object. Returns a status message that starts with the OK flag, then describes whether a redirect occurred, and if so, where the redirect went. Preconditions: - must have loaded data into this instance from a row in the database. """ # return reference status_OUT = "" # declare variables me = "test_URL" http_helper = None url_to_test = "" redirect_url = "" redirect_status_list = "" redirect_status_count = -1 last_redirect_status = -1 exception_helper = None exception_message = "" exception_status = "" # create HTTP Helper http_helper = Http_Helper() # get URL. url_to_test = self.full_url # got a full URL? if (not url_to_test) or (url_to_test == None) or (url_to_test == ""): # no. Make one out of domain. url_to_test = "http://" + self.domain_name # got a path? if (self.domain_path) and (self.domain_path != None) and (self.domain_path != ""): # yes. Append it, as well. url_to_test += self.domain_path # -- END check to see if domain path. --# # -- END check to see if URL --# # now, see if we have a URL again. if (url_to_test) and (url_to_test != None) and (url_to_test != ""): # try/except to capture problems with URL not resolving at all. try: # we have a URL. Use the HTTP helper to test. redirect_url = http_helper.get_redirect_url_mechanize(url_to_test) # see if there is a status code. redirect_status_list = http_helper.redirect_status_list redirect_status_count = len(redirect_status_list) if redirect_status_count > 0: # yes. Get latest one (use pop()). last_redirect_status = redirect_status_list.pop() # -- END check to see if any statuses --# # got anything back? if (redirect_url) and (redirect_url != None) and (redirect_url != ""): # yes. Update the record. # URL is OK. self.is_url_ok = True # store HTTP status code. self.redirect_status = last_redirect_status # store full URL. self.redirect_full_url = redirect_url # parse and store components of URL. self.parse_and_store_URL(URL_IN=redirect_url, is_redirect_IN=True) # set status to Success! status_OUT = self.STATUS_SUCCESS else: # no. No exception, not redirect. Just a normal URL. self.is_url_ok = True # set status status_OUT = "Attempt to find redirect returned no URL. Test URL = " + url_to_test # got a status? if (last_redirect_status) and (last_redirect_status > 0): # there is one. Append it to message. status_OUT += "; HTTP status code: " + str(last_redirect_status) # -- END check to see if HTTP status code --# # -- END check to see if redirect URL --# except Exception as e: # likely URL not found. URL is not OK, do nothing else. self.is_url_ok = False # URLError (and child HTTPError) will have a "reason". if hasattr(e, "reason"): # yes. Store it in the redirect_message field. self.redirect_message = e.reason # -- END check to see if "reason" --# # HTTPError will have an HTTP status "code". if hasattr(e, "code"): # yes, store it in the redirect_status field. self.redirect_status = e.code # -- END check to see if there is a code. # make exception helper class. exception_helper = ExceptionHelper() # process the exception exception_message = "ERROR - Exception caught in " + me + " trying to resolve URL " + url_to_test + "." exception_status = exception_helper.process_exception( exception_IN=e, message_IN=exception_message, print_details_IN=False ) # set status to description of exception status_OUT = exception_helper.last_exception_details # -- END try/except to deal with unknown domains/URLs. --# # save the results. self.save() else: status_OUT = "ERROR - Could not find a URL to test for this record." # -- END check to make sure we have a URL --# return status_OUT
#-- END DEBUG --# #-- END check to see if URL element present. --# # Exception processing XML except Exception as e: # likely no ID attribute in empty station element, returned when # query has no matches. Print XML, error. print( "- Error - exception caught processing XML." ) # increment error counter error_counter += 1 # output Exception details. my_exception_helper.process_exception( e ) # output XML print( npr_bs.prettify() ) #-- END try/except around looking for station ID --# #-- END loop over stations --# #-- END loop over cities from postal code table --# # a little overview end_dt = datetime.datetime.now() print( "==> Started at " + str( start_dt ) ) print( "==> Finished at " + str( end_dt ) ) print( "==> Duration: " + str( end_dt - start_dt ) )