Exemple #1
0
 def test_bad_project(self):
     project = Project(path=os.path.dirname(__file__))
     self.assertFalse(project.inside_project)
     self.assertIsNone(project.cfg_path)
     self.assertIsNone(project.cfg)
     self.assertIsNone(project.project_dir)
     self.assertDictEqual(project.module_settings.values, {})
     self.assertEqual(project.data_dir, '.crawlmi')
Exemple #2
0
 def test_dummy_project(self):
     project = Project(path=None)
     self.assertFalse(project.inside_project)
     self.assertIsNone(project.cfg_path)
     self.assertIsNone(project.cfg)
     self.assertIsNone(project.project_dir)
     self.assertDictEqual(project.module_settings.values, {})
     self.assertEqual(project.data_dir, '.crawlmi')
Exemple #3
0
 def test_data_dir(self):
     project = Project(path=sample_project_dir)
     self.assertEqual(project.data_dir, '.crawlmi')
     project.set_data_dir('crawlmi_data')
     self.assertEqual(project.data_dir, join(sample_project_dir, 'crawlmi_data'))
     self.assertFalse(exists(project.data_dir))
     project.data_path('.', create_dir=True)
     self.assertTrue(exists(project.data_dir))
     os.rmdir(project.data_dir)
     project.set_data_dir(None)
     self.assertEqual(project.data_dir, '.crawlmi')
Exemple #4
0
def execute(argv=None):
    if argv is None:
        argv = sys.argv

    project = Project()
    settings = EngineSettings(module_settings=project.module_settings)
    inside_project = project.inside_project
    cmds = get_commands(settings, inside_project)
    cmd_name = pop_command_name(argv)
    if not cmd_name:
        print_commands(cmds, inside_project)
        sys.exit(0)
    elif cmd_name not in cmds:
        print_unknown_command(cmd_name, inside_project)
        sys.exit(2)

    # initialize the command
    cmd = cmds[cmd_name]()
    # initalize parser
    parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(),
                                   conflict_handler='resolve')
    parser.usage = 'crawlmi %s %s' % (cmd_name, cmd.syntax())
    parser.description = cmd.short_desc()
    cmd.add_options(parser)
    options, args = parser.parse_args(args=argv[1:])
    # initialize custom settings
    custom_settings = run_print_help(parser, cmd.get_settings, args, options)
    settings.custom_settings = custom_settings
    # initialize engine
    engine = Engine(settings, project, command_invoked=cmd_name)
    spider = run_print_help(parser, cmd.get_spider, engine, args, options)
    engine.set_spider(spider)
    # set project's data dir. It has to be when all the settings are known.
    project.set_data_dir(engine.settings.get('DATA_DIR'))
    engine.setup()
    cmd.set_engine(engine)
    # save pidfile
    if getattr(options, 'pidfile', None):
        with open(options.pidfile, 'wb') as f:
            f.write(str(os.getpid()) + os.linesep)
    # run command
    run_print_help(parser, cmd.run, args, options)
Exemple #5
0
    def test_good_project(self):
        project_dirs = [
            sample_project_dir,
            join(sample_project_dir, 'crawlmi_project'),
        ]

        for project_dir in project_dirs:
            project = Project(path=project_dir)
            self.assertTrue(project.inside_project)
            self.assertEqual(project.project_dir, sample_project_dir)
            self.assertEqual(project.module_settings.get_int('TEST'), 42)
Exemple #6
0
def get_engine(custom_settings=None, **kwargs):
    '''Return the engine initialized with the custom settings.
    '''
    custom_settings = custom_settings or {}
    custom_settings.update(kwargs)
    settings = EngineSettings(custom_settings=custom_settings)
    engine = Engine(settings, Project(path=None), clock=Clock())
    engine.set_spider(BaseSpider('dummy'))
    engine.stop_if_idle = False
    # it is common to use stats and signals in unittests, without full
    # initialization of the engine
    engine.stats = MemoryStats(engine)
    engine.signals = SignalManager(engine)
    return engine
Exemple #7
0
    def test_data_path(self):
        project = Project(path=sample_project_dir)
        project.set_data_dir('crawlmi_data')
        # relative path
        expected = join(sample_project_dir, 'crawlmi_data', 'a', 'b')
        relative = project.data_path(join('a', 'b'), create_dir=False)
        self.assertEqual(relative, expected)
        self.assertFalse(exists(expected))
        # create dir
        project.data_path(join('a', 'b'), create_dir=True)
        self.assertTrue(exists(expected))
        os.rmdir(join(sample_project_dir, 'crawlmi_data', 'a', 'b'))
        os.rmdir(join(sample_project_dir, 'crawlmi_data', 'a'))
        # absolute path
        expected = os.path.abspath(__file__)
        absolute = project.data_path(expected, create_dir=False)
        self.assertEqual(absolute, expected)

        os.rmdir(project.data_dir)