Beispiel #1
0
    if last_fetched is None or last_fetched < datetime.datetime.now() - datetime.timedelta(minutes=global_data.CACHE_MIN):
        print "*** Scraping, please wait..."

        # Fetch as long as queue is not empty
        while len(queue) > 0:
            scraper = Ghost()

            # Get item from head of list which will be the current url
            # which we will scrape.
            current_url = queue.pop(0)

            print "*** Scraping %s" % current_url

            # Scrape it!
            scraper.scrape(current_url)

            # Add our new urls to the queue
            queue += scraper.href

            if "/kurs/" in current_url and personal == False:
                # Debugging
                if global_data.DEBUG:
                    print 'Coursecode: %s' % scraper.course_code if scraper.course_code else 'No course code information'
                    print 'Title: %s' % scraper.title if scraper.title else 'No title information'
                    print 'URL: %s' % scraper.url if scraper.url else 'No url information'
                    print 'Courseplan: %s' % scraper.course_plan if scraper.course_plan else 'No course plan information'
                    print 'Ledtext: %s' % scraper.intro_text if scraper.intro_text else 'No intro information'
                    print 'Senaste post titel: %s' % scraper.latest_post_title if scraper.latest_post_title else 'No latest title information'
                    print 'Senaste post skribent: %s' % scraper.latest_post_author if scraper.latest_post_author else 'No latest author information'
                    print 'Senaste post datum: %s' % scraper.latest_post_time if scraper.latest_post_time else 'No latest date information'