Ejemplo n.º 1
0
def _crawl(crawler: Crawler, args: argparse.Namespace) -> int:
    """Crawl using the provided crawler.

    Args:
        crawler: The crawler object.
        args: The command line arguments

    Returns:
        0 on success, else 1
    """
    failure_occured = False
    try:
        crawler.crawl()
        _print_dead_links(crawler.dead_links)
    except CrawlerException as exception:
        logger.error(str(exception))
        failure_occured = True
    except Exception as exception:
        failure_occured = True
        # Using Broad exception to catch all errors to give a proper error message
        logger.error("Error occured while crawling")
        if args.show_exception_tb:  # To keep the output clean
            logger.exception(exception)

    return 1 if failure_occured else 0
Ejemplo n.º 2
0
class ConverterTest(unittest.TestCase):
    def test_crawl(self):
        self.crawler = Crawler()
        self.assertIsInstance(self.crawler.crawl(), list)

    if __name__ == '__main__':
        unittest.main()
Ejemplo n.º 3
0
def start():
    crawler = Crawler()
    crawler_results = crawler.crawl()
    for crawler_result in crawler_results:
        attribute_string = crawler_result.get('attribute_string')
        attribute_usd_price = crawler_result.get('attribute_usd_price')
        attribute = crawler_result.get('attribute')
        converter = Converter()
        print attribute,converter.convert(attribute_usd_price,attribute_string)
Ejemplo n.º 4
0
import argparse

from src import settings
from src.api import BildungsserverFeed, LocalXmlFeed, LocalRssFeed
from src.crawler import Crawler, SiemensCrawler, BildungsserverCrawler
from src.exceptions import ConfigurationError

if __name__ == '__main__':
    if settings.CRAWLER.lower() == 'bildungsserver':
        Crawler = BildungsserverCrawler
    elif settings.CRAWLER.lower() == 'siemens-stiftung':
        Crawler = SiemensCrawler
    else:
        raise ConfigurationError("settings.CRAWLER must be set.")
    dry_run = settings.DRY_RUN

    crawler = Crawler(dry_run=dry_run)
    crawler.crawl()