Beispiel #1
0
def main(*argv):
    """
    = Configuration import =

    check "--help"

    Run command: python -m toddler.tools.configimport

    :param argv:
    :return:
    """
    parser = argparse.ArgumentParser(argv, description="ConfigImport v{}".format(__version__))

    parser.add_argument("-t", "--type", help="Config type", choices=["crawl"])

    if len(argv) > 0:
        args = setup(argv, argument_parser=parser, do_not_parse_config=True)
    else:
        args = setup(argument_parser=parser, do_not_parse_config=True)

    print(Style.DIM + Fore.BLUE + "ConfigImport v{}".format(__version__))

    with open(args.config) as config_file:
        print(Fore.BLUE + "Opened file:" + Fore.RESET + " {}".format(args.config) + Fore.RESET)
        if args.type == "crawl":
            print(Style.BRIGHT + Fore.BLUE + "Importing crawlConfig")
            from toddler.imports.nimbuscrawl import get_configuration
            from toddler.models import Host

            config_content = config_file.read()
            config = get_configuration(config_content)
            for host_name, crawl_config in config:
                host = Host.objects(host=host_name).first()
                if host is None:
                    host = Host(host=host_name)

                host.config["crawlConfig"] = crawl_config
                host.save()
                print(Fore.GREEN + "+ Added config for host {}".format(host_name))
Beispiel #2
0
    def setUp(self):

        self.url_patterns = [
            {
                "patterns": [
                    "http:\/\/example.com\/.*\.html"
                ],
                "actions": ["follow", "index"]
            },
            {
                "patterns": [
                    "http:\/\/example.com\/nocrawl\/.*\.html"
                ],
                "actions": ["nofollow"]
            }
        ]
        host = {
            "host": "example.com",
            "block": False,
            "block_date": "2015-02-02T14:23:12+00:00",
            "number_of_documents": 3434,
            "config": {"crawlConfig": self.url_patterns},
            "ignore_robots": False,
            "robots_txt": RobotsTxt(**{
                "status": "downloaded",
                "status_code": 200,
                "content": "User-Agent: *\nAllow: /\n",
                "expires": datetime.now(timezone.utc)+timedelta(10)
            })
        }
        self.host = Host(**host)
        try:
            # we want to run it only once
            setup(['-m', 'mongodb://localhost'])
        except SystemError:
            # will raise exception because setup is run with every test
            pass
Beispiel #3
0
def main(*argv):
    """
    = Toddler tools list v0.1.0
    :param argv:
    :return:
    """

    parser = ArgumentParser(usage="--list",
                            description="Returns list of available tools")

    parser.add_argument("-l", "--list", default=True, action="store_true")
    parser.add_argument("-r", "--raw", default=False, action="store_true")
    args = setup(argv, argument_parser=parser, do_not_parse_config=True)

    if args.list:
        if not args.raw:
            print()
            name = "Toddler tools lister."
            print(Style.BRIGHT+Fore.BLUE+name)
            print(Style.DIM+Fore.BLUE+"="*len(name))
            print()
            print(Fore.YELLOW+"Usage: toddler-tools "+Style.DIM+"TOOL_NAME")
            print()
            print("Available tools:")
        for fname in glob(os.path.join(os.path.dirname(__file__), "*.py")):
            base_fname = os.path.basename(fname).replace(".py", "")
            if base_fname != "__init__":
                loader = importlib.machinery.SourceFileLoader(
                    "toddler.tools."+base_fname,
                    fname
                )
                im = loader.load_module()
                if hasattr(im, 'main'):
                    if not args.raw:
                        print(Fore.GREEN+"\t{}".format(base_fname))
                    else:
                        print(base_fname)
        if not args.raw:
            print("\nCheck " + Fore.RED + ""
                  "--help" + Fore.RESET + " for each one of them for more"
                                          " information\n")
Beispiel #4
0
    def test_setup(self):

        with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False)\
                as tmp_file:

            tmp_file.write("test: 123")
            tmp_file.close()
            argv = ['-c', '{}'.format(tmp_file.name)]
            decorators._reset_already_run(setup)
            setup(argv)

            self.assertEqual(config.config.test, 123)
            os.unlink(tmp_file.name)

            self.assertRaises(SystemError, setup, argv)
            self.assertTrue(decorators.has_been_run(setup))

        config.config = Dict()

        decorators._reset_already_run(setup)

        # reset the already run
        with tempfile.NamedTemporaryFile("w", suffix=".json", delete=False) \
                as tmp_file:

            tmp_file.write('{"test": 123}')
            tmp_file.close()
            argv = ['-c', tmp_file.name]

            setup(argv)

            self.assertEqual(config.config.test, 123)

            os.unlink(tmp_file.name)

        decorators._reset_already_run(setup)

        argv = ['-m', "mongodb://localhost"]

        with mock.patch("toddler.models.connect") as connect:

            def mock_connect(host=None):
                self.assertEqual(host, "mongodb://localhost")
            connect.side_effect = mock_connect
            setup(argv)

            self.assertTrue(connect.called)
Beispiel #5
0
    def setUp(self):

        _reset_already_run(setup)

        setup(['-m', 'mongodb://localhost/aladdin'])
Beispiel #6
0
    def setUpClass(cls):
        setup(['-m', 'mongodb://localhost'],
              do_not_parse_config=True)

        super(SchedulerTest, cls).setUpClass()