Example #1
0
    def __init__(self, rel_dir="../FourmiCrawler/sources"):
        """
        The initiation of a SourceLoader, selects and indexes a directory for usable sources.
        Also loads a configuration file for Sources and passes the arguments in
        the named section to the source
        :param rel_dir: A relative path to a directory.
        """
        path = os.path.dirname(os.path.abspath(__file__))
        path += "/" + rel_dir
        known_parser = set()

        config = Configurator.read_sourceconfiguration()

        for py in [
                f[:-3] for f in os.listdir(path)
                if f.endswith('.py') and f != '__init__.py'
        ]:
            mod = __import__('.'.join(
                [rel_dir.replace("../", "").replace("/", "."), py]),
                             fromlist=[py])
            classes = [
                getattr(mod, x) for x in dir(mod)
                if inspect.isclass(getattr(mod, x))
            ]
            for cls in classes:
                if issubclass(cls, Source) and cls not in known_parser:
                    sourcecfg = Configurator.get_section(config, cls.__name__)
                    self.sources.append(cls(sourcecfg))
                    known_parser.add(cls)
Example #2
0
    def __init__(self, rel_dir="../FourmiCrawler/sources"):
        """
        The initiation of a SourceLoader, selects and indexes a directory for usable sources.
        Also loads a configuration file for Sources and passes the arguments in
        the named section to the source
        :param rel_dir: A relative path to a directory.
        """
        path = os.path.dirname(os.path.abspath(__file__))
        path += "/" + rel_dir
        known_parser = set()

        config = Configurator.read_sourceconfiguration()

        for py in [f[:-3] for f in os.listdir(path) if f.endswith(".py") and f != "__init__.py"]:
            mod = __import__(".".join([rel_dir.replace("../", "").replace("/", "."), py]), fromlist=[py])
            classes = [getattr(mod, x) for x in dir(mod) if inspect.isclass(getattr(mod, x))]
            for cls in classes:
                if issubclass(cls, Source) and cls not in known_parser:
                    sourcecfg = Configurator.get_section(config, cls.__name__)
                    self.sources.append(cls(sourcecfg))
                    known_parser.add(cls)
Example #3
0
class TestConfigurator(unittest.TestCase):
    def setUp(self):
        self.conf = Configurator()

    def test_set_output(self):
        self.conf.set_output(filename="test.txt",
                             fileformat="csv",
                             compound="test")
        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.txt")
        self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "csv")

        self.conf.set_output("<compound>.*format*", "jsonlines", "test")
        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.json")
        self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "jsonlines")

        self.conf.set_output("<compound>.*format*", "csv", "test")
        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.csv")
        self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "csv")

    def test_start_log(self):
        for i in range(0, 3):
            self.conf.set_logging("TEST", i)
            self.assertEqual(self.conf.scrapy_settings.get("LOG_FILE"), "TEST")
            if i > 0:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_ENABLED"),
                                 True)
                if i > 1:
                    self.assertEqual(
                        self.conf.scrapy_settings.get("LOG_STDOUT"), False)
                else:
                    self.assertEqual(
                        self.conf.scrapy_settings.get("LOG_STDOUT"), True)
            else:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_ENABLED"),
                                 False)
                self.assertEqual(self.conf.scrapy_settings.get("LOG_STDOUT"),
                                 True)
            if i == 1:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"),
                                 "WARNING")
            elif i == 2:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"),
                                 "INFO")
            elif i == 3:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"),
                                 "DEBUG")

            self.conf.set_logging(verbose=i)
            self.assertEqual(self.conf.scrapy_settings.get("LOG_FILE"), None)

    def test_read_sourceconfiguration(self):
        config = self.conf.read_sourceconfiguration()
        self.assertIsInstance(config, ConfigParser.ConfigParser)

    def test_get_section(self):
        config = ConfigParser.ConfigParser()
        section = self.conf.get_section(config, 'test')
        self.assertIn('reliability', section)
        self.assertEquals(section['reliability'], '')

        config.set('DEFAULT', 'reliability', 'Low')

        section = self.conf.get_section(config, 'test')
        self.assertEquals(section['reliability'], 'Low')

        config.add_section('test')
        config.set('test', 'var', 'Maybe')

        section = self.conf.get_section(config, 'test')
        self.assertEquals(section['reliability'], 'Low')
        self.assertEqual(section['var'], 'Maybe')
Example #4
0
class TestConfigurator(unittest.TestCase):

    def setUp(self):
        self.conf = Configurator()

    def test_set_output(self):
        self.conf.set_output(filename="test.txt", fileformat="csv", compound="test")
        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.txt")
        self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "csv")

        self.conf.set_output("<compound>.*format*", "jsonlines", "test")
        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.json")
        self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "jsonlines")

        self.conf.set_output("<compound>.*format*", "csv", "test")
        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.csv")
        self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "csv")

    def test_start_log(self):
        for i in range(0, 3):
            self.conf.set_logging("TEST", i)
            self.assertEqual(self.conf.scrapy_settings.get("LOG_FILE"), "TEST")
            if i > 0:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_ENABLED"), True)
                if i > 1:
                    self.assertEqual(self.conf.scrapy_settings.get("LOG_STDOUT"), False)
                else:
                    self.assertEqual(self.conf.scrapy_settings.get("LOG_STDOUT"), True)
            else:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_ENABLED"), False)
                self.assertEqual(self.conf.scrapy_settings.get("LOG_STDOUT"), True)
            if i == 1:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"), "WARNING")
            elif i == 2:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"), "INFO")
            elif i == 3:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"), "DEBUG")

            self.conf.set_logging(verbose=i)
            self.assertEqual(self.conf.scrapy_settings.get("LOG_FILE"), None)

    def test_read_sourceconfiguration(self):
        config = self.conf.read_sourceconfiguration()
        self.assertIsInstance(config, ConfigParser.ConfigParser)

    def test_get_section(self):
        config = ConfigParser.ConfigParser()
        section = self.conf.get_section(config, 'test')
        self.assertIn('reliability', section)
        self.assertEquals(section['reliability'], '')

        config.set('DEFAULT', 'reliability', 'Low')

        section = self.conf.get_section(config, 'test')
        self.assertEquals(section['reliability'], 'Low')

        config.add_section('test')
        config.set('test', 'var', 'Maybe')

        section = self.conf.get_section(config, 'test')
        self.assertEquals(section['reliability'], 'Low')
        self.assertEqual(section['var'], 'Maybe')