Beispiel #1
0
 def test_data_dir(self):
     project = Project(path=sample_project_dir)
     self.assertEqual(project.data_dir, '.crawlmi')
     project.set_data_dir('crawlmi_data')
     self.assertEqual(project.data_dir, join(sample_project_dir, 'crawlmi_data'))
     self.assertFalse(exists(project.data_dir))
     project.data_path('.', create_dir=True)
     self.assertTrue(exists(project.data_dir))
     os.rmdir(project.data_dir)
     project.set_data_dir(None)
     self.assertEqual(project.data_dir, '.crawlmi')
Beispiel #2
0
    def test_data_path(self):
        project = Project(path=sample_project_dir)
        project.set_data_dir('crawlmi_data')
        # relative path
        expected = join(sample_project_dir, 'crawlmi_data', 'a', 'b')
        relative = project.data_path(join('a', 'b'), create_dir=False)
        self.assertEqual(relative, expected)
        self.assertFalse(exists(expected))
        # create dir
        project.data_path(join('a', 'b'), create_dir=True)
        self.assertTrue(exists(expected))
        os.rmdir(join(sample_project_dir, 'crawlmi_data', 'a', 'b'))
        os.rmdir(join(sample_project_dir, 'crawlmi_data', 'a'))
        # absolute path
        expected = os.path.abspath(__file__)
        absolute = project.data_path(expected, create_dir=False)
        self.assertEqual(absolute, expected)

        os.rmdir(project.data_dir)
Beispiel #3
0
def execute(argv=None):
    if argv is None:
        argv = sys.argv

    project = Project()
    settings = EngineSettings(module_settings=project.module_settings)
    inside_project = project.inside_project
    cmds = get_commands(settings, inside_project)
    cmd_name = pop_command_name(argv)
    if not cmd_name:
        print_commands(cmds, inside_project)
        sys.exit(0)
    elif cmd_name not in cmds:
        print_unknown_command(cmd_name, inside_project)
        sys.exit(2)

    # initialize the command
    cmd = cmds[cmd_name]()
    # initalize parser
    parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(),
                                   conflict_handler='resolve')
    parser.usage = 'crawlmi %s %s' % (cmd_name, cmd.syntax())
    parser.description = cmd.short_desc()
    cmd.add_options(parser)
    options, args = parser.parse_args(args=argv[1:])
    # initialize custom settings
    custom_settings = run_print_help(parser, cmd.get_settings, args, options)
    settings.custom_settings = custom_settings
    # initialize engine
    engine = Engine(settings, project, command_invoked=cmd_name)
    spider = run_print_help(parser, cmd.get_spider, engine, args, options)
    engine.set_spider(spider)
    # set project's data dir. It has to be when all the settings are known.
    project.set_data_dir(engine.settings.get('DATA_DIR'))
    engine.setup()
    cmd.set_engine(engine)
    # save pidfile
    if getattr(options, 'pidfile', None):
        with open(options.pidfile, 'wb') as f:
            f.write(str(os.getpid()) + os.linesep)
    # run command
    run_print_help(parser, cmd.run, args, options)