Beispiel #1
0
class BaseScraper(object):
    """ Basic Framework of a simple web communication tool. """
   
    def __init__(self, url):
        self.set_url(url)
        self.response = None
        self.soup = None
        self.parser = ParsingManager()
        self.history = HistoryManager()
    
    # Getters / Setters/ Loaders
    def set_url(self, url):
        if type(url) is str:
            self.url = parse_url(url) # Returns a EasyUrl() object
        else:
            self.url = parse_url(str(url))
        
    def get_status_code(self):
        """ return self.response.status_code, the status of the last made request. """
        if self.response:
            return self.response.status_code
        else:
            return None
    
    def load_soup(self):
        """ creates a soup object from self.response. """
        if self.response:
            self.soup = BS(self.response.text)    


    # Web Communication Methods
    def get(self, payload={}):
        self.response = requests.get(self.url, params=payload)
        self.load_soup()
        self.history.save(self) # Save the state of the passed scraper
Beispiel #2
0
 def __init__(self, url):
     self.set_url(url)
     self.response = None
     self.soup = None
     self.parser = ParsingManager()
     self.history = HistoryManager()