Esempio n. 1
0
    def process(self):
        html = self.get_html('http://www.zonadivas.com/principal.html')

        page_links = self.extract_page_links(html)

        repository = CompoundRepository('c:\\temp\\')
        processors = []
        for name, url in page_links.items():
            print('processing profile: ' + url)

            name = name.replace(' ', '')

            if repository.profile_exist(name):
                print('profile [' + name + '] has no changes')
                continue

            # extract data
            processor = PageAnalyzer()
            profile, photo_urls = processor.analyze(url)

            # persist to FS
            persister = ProfilePersister(repository)
            persister.persist(name, profile, photo_urls)

            print('profile processed: ' + url)
    def run(self):
        print('processing profile: ' + self.url)
        # extract data
        processor = PageAnalyzer()
        profile, photo_urls = processor.analyze(self.url)
        # persist to FS
        persister = ProfilePersister(self.repository)
        persister.persist(self.name, profile, photo_urls)

        print('profile processed: ' + self.url)