def get_long_url(url, response): """ Replaces the passed URL with the response.url variable and attempts to extract the destination URL of an URL shortening service with the help of defined filters inside the database, return type is 'Response', which will hold the variable Response.long_url or Response.long_urls when the destination was found. """ # When obtaining the source of an URL it's often required to modify the URL, # to get the desired source, here I reset the actual URL (which we read from # the user input) with the URL that was used to obtain the data. response.url = url if(not response.success): return response # Parse the source and URL soup = BeautifulSoup(response.source) parsed_url = urlparse(response.url) netloc = parsed_url.netloc try: filters = load_filters(database_path) # Find the correct filter for the passed response object and or URL for filter_object in filters: # When the current filter's identifier matches with the passed URL # or the passed response object if response.group_marker == filter_object.service_id\ or filter_object.service_id == netloc: # Remove the group tag from the response object (we no longer # need it) if response.group_marker: del response.group_marker # Execute the filter for the specific # service and store the results in response.long_url(s) if filter_object.is_multi: long_urls = eval(filter_object.filter) if not long_urls: raise AttributeError response.long_urls = long_urls else: long_url = eval(filter_object.filter) if not long_url: raise AttributeError response.long_url = long_url break # And finally when nothing could be found raise an error and set the # error message accordingly. if not response.long_url and not response.long_urls: raise AttributeError except(IOError): raise response.error_msg = "Failed to extract the destination URL!" return response
response.long_urls = long_urls else: long_url = eval(filter_object.filter) if not long_url: raise AttributeError response.long_url = long_url break # And finally when nothing could be found raise an error and set the # error message accordingly. if not response.long_url and not response.long_urls: raise AttributeError except(IOError): raise response.error_msg = "Failed to extract the destination URL!" return response # Read the filters from the database if __name__ == "modules.routines": database_path = "data/data.sqlite" if not path.isfile(database_path): raise ValueError("Database not found, no filters loaded!") elif __name__ in ("__main__", "routines"): database_path = "../data/data.sqlite" if not path.isfile(database_path): raise ValueError("Database not found, no filters loaded!") else: load_filters(database_path) else: raise ValueError("Database not found!")