def process(self): html = self.get_html('http://www.zonadivas.com/principal.html') page_links = self.extract_page_links(html) repository = CompoundRepository('c:\\temp\\') processors = [] for name, url in page_links.items(): print('processing profile: ' + url) name = name.replace(' ', '') if repository.profile_exist(name): print('profile [' + name + '] has no changes') continue # extract data processor = PageAnalyzer() profile, photo_urls = processor.analyze(url) # persist to FS persister = ProfilePersister(repository) persister.persist(name, profile, photo_urls) print('profile processed: ' + url)
def run(self): print('processing profile: ' + self.url) # extract data processor = PageAnalyzer() profile, photo_urls = processor.analyze(self.url) # persist to FS persister = ProfilePersister(self.repository) persister.persist(self.name, profile, photo_urls) print('profile processed: ' + self.url)