Ejemplo n.º 1
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE",
                          print_help=False)
     if opts.output:
         if opts.output == '-':
             self.settings.set('FEED_URI', 'stdout:', priority='cmdline')
         else:
             self.settings.set('FEED_URI', opts.output, priority='cmdline')
         feed_exporters = without_none_values(
             self.settings.getwithbase('FEED_EXPORTERS'))
         valid_output_formats = feed_exporters.keys()
         if not opts.output_format:
             opts.output_format = os.path.splitext(opts.output)[1].replace(
                 ".", "")
         if opts.output_format not in valid_output_formats:
             raise UsageError(
                 "Unrecognized output format '%s', set one"
                 " using the '-t' switch or as a file extension"
                 " from the supported list %s" %
                 (opts.output_format, tuple(valid_output_formats)))
         self.settings.set('FEED_FORMAT',
                           opts.output_format,
                           priority='cmdline')
Ejemplo n.º 2
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError(
             "Invalid -a value, use -a NAME=VALUE", print_help=False)
Ejemplo n.º 3
0
    def process_spider_arguments(self, opts):

        try:
            opts.spargs = arglist_to_dict(opts.spargs)
        except ValueError:
            raise UsageError("Invalid -a value, use -a NAME=VALUE",
                             print_help=False)
Ejemplo n.º 4
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError('Invalid -a value, use -a Name=VALUE',
                          print_hlpe=False)
Ejemplo n.º 5
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
     if opts.overwrite_output:
         if opts.output:
             raise UsageError("Please use only one of --output and --overwrite-output")
         opts.output = opts.overwrite_output
         self.settings.overrides['FEED_OVERWRITE'] = True
     if opts.output:
         if opts.output == '-':
             self.settings.set('FEED_URI', 'stdout:', priority='cmdline')
         else:
             self.settings.set('FEED_URI', opts.output, priority='cmdline')
         valid_output_formats = (
             list(self.settings.getdict('FEED_EXPORTERS').keys()) +
             list(self.settings.getdict('FEED_EXPORTERS_BASE').keys())
         )
         if not opts.output_format:
             opts.output_format = os.path.splitext(opts.output)[1].replace(".", "")
         if opts.output_format not in valid_output_formats:
             raise UsageError("Unrecognized output format '%s', set one"
                              " using the '-t' switch or as a file extension"
                              " from the supported list %s" % (opts.output_format,
                                                               tuple(valid_output_formats)))
         self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
Ejemplo n.º 6
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise RuntimeError("Invalid -a value, use -a NAME=VALUE",
                            print_help=False)
Ejemplo n.º 7
0
    def process_options(self, args, opts):
        try:
            self.settings.setdict(arglist_to_dict(opts.set),
                                  priority='cmdline')
        except ValueError:
            raise UsageError("Invalid -s value, use -s NAME=VALUE",
                             print_help=False)

        if opts.logfile:
            self.settings.set('LOG_ENABLED', True, priority='cmdline')
            self.settings.set('LOG_FILE', opts.logfile, priority='cmdline')

        if opts.loglevel:
            self.settings.set('LOG_ENABLED', True, priority='cmdline')
            self.settings.set('LOG_LEVEL', opts.loglevel, priority='cmdline')

        if opts.nolog:
            self.settings.set('LOG_ENABLED', False, priority='cmdline')

        if opts.pidfile:
            with open(opts.pidfile, "w") as f:
                f.write(str(os.getpid()) + os.linesep)

        if opts.pdb:
            failure.startDebugMode()
Ejemplo n.º 8
0
    def run(self, args, opts):
        self.set_pages(opts.pages)
        try:
            opts.spargs = arglist_to_dict(opts.spargs)
        except ValueError:
            raise UsageError("Invalid -a value, use -a NAME=VALUE",
                             print_help=False)
        self.settings.set('GOOD_ONLY', opts.good_only)
        self.settings.set('SEE_LZ', opts.see_lz)
        if opts.filter:
            try:
                opts.filter = eval('filter.' + opts.filter)
            except:
                raise UsageError("Invalid filter function name!")
        self.settings.set("FILTER", opts.filter)
        cfg = config.config()
        if len(args) >= 3:
            raise UsageError("Too many arguments!")

        for i in range(len(args)):
            if isinstance(args[i], bytes):
                args[i] = args[i].decode("utf8")

        if not 'MYSQL_PORT' in cfg.config.keys():
            cfg.config['MYSQL_PORT'] = 3306

        self.settings.set('MYSQL_HOST', cfg.config['MYSQL_HOST'])
        self.settings.set('MYSQL_USER', cfg.config['MYSQL_USER'])
        self.settings.set('MYSQL_PASSWD', cfg.config['MYSQL_PASSWD'])
        self.settings.set('MYSQL_PORT', cfg.config['MYSQL_PORT'])

        tbname = cfg.config['DEFAULT_TIEBA']
        if len(args) >= 1:
            tbname = args[0]

        dbname = None
        if tbname in cfg.config['MYSQL_DBNAME'].keys():
            dbname = cfg.config['MYSQL_DBNAME'][tbname]
        if len(args) >= 2:
            dbname = args[1]
            cfg.config['MYSQL_DBNAME'][tbname] = dbname
        if not dbname:
            raise UsageError("Please input database name!")

        self.settings.set('TIEBA_NAME', tbname, priority='cmdline')
        self.settings.set('MYSQL_DBNAME', dbname, priority='cmdline')

        config.init_database(cfg.config['MYSQL_HOST'],
                             cfg.config['MYSQL_USER'],
                             cfg.config['MYSQL_PASSWD'],
                             cfg.config['MYSQL_PORT'], dbname)

        log = config.log(tbname, dbname, self.settings['BEGIN_PAGE'],
                         opts.good_only, opts.see_lz)
        self.settings.set('SIMPLE_LOG', log)
        self.crawler_process.crawl('tieba', **opts.spargs)
        self.crawler_process.start()

        cfg.save()
Ejemplo n.º 9
0
 def test_arglist_to_dict(self):
     self.assertEqual(
         arglist_to_dict(["arg1=val1", "arg2=val2"]),
         {
             "arg1": "val1",
             "arg2": "val2"
         },
     )
Ejemplo n.º 10
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
     if opts.nofollow:
         settings.overrides['CRAWLSPIDER_FOLLOW_LINKS'] = False
Ejemplo n.º 11
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
     if opts.output:
         feeds = feed_process_params_from_cli(self.settings, opts.output, opts.output_format)
         self.settings.set('FEEDS', feeds, priority='cmdline')
Ejemplo n.º 12
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE",
                          print_help=False)
     if opts.nofollow:
         settings.overrides['CRAWLSPIDER_FOLLOW_LINKS'] = False
Ejemplo n.º 13
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
         if opts.urlqueue:
             self.settings.set('Redis_key',
                               opts.urlqueue,
                               priority='cmdline')
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE",
                          print_help=False)
Ejemplo n.º 14
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
     if opts.output:
         if opts.output == '-':
             self.settings.overrides['FEED_URI'] = 'stdout:'
         else:
             self.settings.overrides['FEED_URI'] = opts.output
         self.settings.overrides['FEED_FORMAT'] = opts.output_format
Ejemplo n.º 15
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE",
                          print_help=False)
     if opts.output:
         if opts.output == '-':
             self.settings.overrides['FEED_URI'] = 'stdout:'
         else:
             self.settings.overrides['FEED_URI'] = opts.output
         self.settings.overrides['FEED_FORMAT'] = opts.output_format
Ejemplo n.º 16
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
     if opts.output:
         if opts.output == '-':
             self.settings.overrides['FEED_URI'] = 'stdout:'
         else:
             self.settings.overrides['FEED_URI'] = opts.output
         valid_output_formats = self.settings['FEED_EXPORTERS'].keys() + self.settings['FEED_EXPORTERS_BASE'].keys()
         if opts.output_format not in valid_output_formats:
             raise UsageError('Invalid/unrecognized output format: %s, Expected %s' % (opts.output_format,valid_output_formats))
         self.settings.overrides['FEED_FORMAT'] = opts.output_format
Ejemplo n.º 17
0
Archivo: crawl.py Proyecto: 1012/scrapy
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
     if opts.output:
         if opts.output == '-':
             self.settings.overrides['FEED_URI'] = 'stdout:'
         else:
             self.settings.overrides['FEED_URI'] = opts.output
         valid_output_formats = self.settings['FEED_EXPORTERS'].keys() + self.settings['FEED_EXPORTERS_BASE'].keys()
         if opts.output_format not in valid_output_formats:
             raise UsageError('Invalid/unrecognized output format: %s, Expected %s' % (opts.output_format, valid_output_formats))
         self.settings.overrides['FEED_FORMAT'] = opts.output_format
Ejemplo n.º 18
0
    def process_options(self, args, opts):
        try:
            self.settings.overrides.update(arglist_to_dict(opts.set))
        except ValueError:
            raise UsageError("Invalid -s value, use -s NAME=VALUE", print_help=False)

        if opts.logfile:
            self.settings.overrides['LOG_ENABLED'] = True
            self.settings.overrides['LOG_FILE'] = opts.logfile

        if opts.loglevel:
            self.settings.overrides['LOG_ENABLED'] = True
            self.settings.overrides['LOG_LEVEL'] = opts.loglevel

        if opts.nolog:
            self.settings.overrides['LOG_ENABLED'] = False

        if opts.pidfile:
            with open(opts.pidfile, "w") as f:
                f.write(str(os.getpid()) + os.linesep)
Ejemplo n.º 19
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
     if opts.output:
         if opts.output == '-':
             self.settings.overrides['FEED_URI'] = 'stdout:'
         else:
             self.settings.overrides['FEED_URI'] = opts.output
         valid_output_formats = self.settings['FEED_EXPORTERS'].keys() + self.settings['FEED_EXPORTERS_BASE'].keys()
         if not opts.output_format:
             opts.output_format = os.path.splitext(opts.output)[1].replace(".", "")
         if opts.output_format not in valid_output_formats:
             raise UsageError("Unrecognized output format '%s', set one"
                              " using the '-t' switch or as a file extension"
                              " from the supported list %s" % (opts.output_format,
                                                               tuple(valid_output_formats)))
         self.settings.overrides['FEED_FORMAT'] = opts.output_format
Ejemplo n.º 20
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
     if opts.output:
         if opts.output == '-':
             self.settings.set('FEED_URI', 'stdout:', priority='cmdline')
         else:
             self.settings.set('FEED_URI', opts.output, priority='cmdline')
         feed_exporters = without_none_values(self.settings._getcomposite('FEED_EXPORTERS'))
         valid_output_formats = feed_exporters.keys()
         if not opts.output_format:
             opts.output_format = os.path.splitext(opts.output)[1].replace(".", "")
         if opts.output_format not in valid_output_formats:
             raise UsageError("Unrecognized output format '%s', set one"
                              " using the '-t' switch or as a file extension"
                              " from the supported list %s" % (opts.output_format,
                                                               tuple(valid_output_formats)))
         self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
Ejemplo n.º 21
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
     if opts.output:
         if opts.output == "-":
             self.settings.set("FEED_URI", "stdout:", priority="cmdline")
         else:
             self.settings.set("FEED_URI", opts.output, priority="cmdline")
         valid_output_formats = list(self.settings.getdict("FEED_EXPORTERS").keys()) + list(
             self.settings.getdict("FEED_EXPORTERS_BASE").keys()
         )
         if not opts.output_format:
             opts.output_format = os.path.splitext(opts.output)[1].replace(".", "")
         if opts.output_format not in valid_output_formats:
             raise UsageError(
                 "Unrecognized output format '%s', set one"
                 " using the '-t' switch or as a file extension"
                 " from the supported list %s" % (opts.output_format, tuple(valid_output_formats))
             )
         self.settings.set("FEED_FORMAT", opts.output_format, priority="cmdline")
Ejemplo n.º 22
0
    def process_options(self, args, opts):
        try:
            self.settings.setdict(arglist_to_dict(opts.set),
                                  priority='cmdline')
        except ValueError:
            raise UsageError("Invalid -s value, use -s NAME=VALUE", print_help=False)

        if opts.logfile:
            self.settings.set('LOG_ENABLED', True, priority='cmdline')
            self.settings.set('LOG_FILE', opts.logfile, priority='cmdline')

        if opts.loglevel:
            self.settings.set('LOG_ENABLED', True, priority='cmdline')
            self.settings.set('LOG_LEVEL', opts.loglevel, priority='cmdline')

        if opts.nolog:
            self.settings.set('LOG_ENABLED', False, priority='cmdline')

        if opts.pidfile:
            with open(opts.pidfile, "w") as f:
                f.write(str(os.getpid()) + os.linesep)

        if opts.pdb:
            failure.startDebugMode()
Ejemplo n.º 23
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         print "Invalid -a value, use -a NAME=VALUE"
Ejemplo n.º 24
0
 def test_arglist_to_dict(self):
     self.assertEqual(arglist_to_dict(['arg1=val1', 'arg2=val2']),
         {'arg1': 'val1', 'arg2': 'val2'})
Ejemplo n.º 25
0
 def test_arglist_to_dict(self):
     self.assertEqual(arglist_to_dict(['arg1=val1', 'arg2=val2']), {
         'arg1': 'val1',
         'arg2': 'val2'
     })
Ejemplo n.º 26
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         print("Invalid -a value, use -a NAME=VALUE")
Ejemplo n.º 27
0
    def process_spider_arguments(self, opts):

        try:
            opts.spargs = arglist_to_dict(opts.spargs)
        except ValueError:
            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
Ejemplo n.º 28
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         print "*************** Multitask exception *******************"
Ejemplo n.º 29
0
    def process_options(self, args, opts):
        ScrapyCommand.process_options(self, args, opts)

        opts.spargs = arglist_to_dict(opts.spargs)
Ejemplo n.º 30
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         pass
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     # try:
     opts.spargs = arglist_to_dict(opts.spargs)
Ejemplo n.º 32
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except Exception:
         raise Exception