Example #1
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError(
             "Invalid -a value, use -a NAME=VALUE", print_help=False)
Example #2
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--get", dest="get", metavar="SETTING", help="print raw setting value")
     parser.add_option("--getbool", dest="getbool", metavar="SETTING", help="print setting value, interpreted as a boolean")
     parser.add_option("--getint", dest="getint", metavar="SETTING", help="print setting value, interpreted as an integer")
     parser.add_option("--getfloat", dest="getfloat", metavar="SETTING", help="print setting value, interpreted as a float")
     parser.add_option("--getlist", dest="getlist", metavar="SETTING", help="print setting value, interpreted as a list")
Example #3
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option('--dry-run',
                       action='store_true',
                       help='Runs the spiders without writing any files')
     parser.add_option('--sample',
                       type=int,
                       help='The number of files to write')
Example #4
0
 def add_options(self, parser):
     parser.usage = "usage: scrapy spiderdocs [<module.name>] [-o <filename.md>]"
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-o",
                       "--output",
                       dest="output_filename",
                       metavar="FILE",
                       help="Output file name.")
Example #5
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     arg = parser.add_option
     arg('-o', '--output', help='prefix for charts (without ".html")')
     arg('--step', type=float, default=30, help='time step, s')
     arg('--smooth', type=int, default=50, help='smooth span')
     arg('--top', type=int, default=30, help='top domains to show')
     arg('--no-show', action='store_true', help='don\'t show charts')
Example #6
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
                       help="set spider argument (may be repeated)")
     parser.add_option("-o", "--output", metavar="FILE",
                       help="dump scraped items into FILE (use - for stdout)")
     parser.add_option("-t", "--output-format", metavar="FORMAT",
                       help="format to use for dumping items with -o")
Example #7
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--spider", dest="spider",
         help="use this spider")
     parser.add_option("--headers", dest="headers", action="store_true", \
         help="print response HTTP headers instead of body")
     parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \
         default=False, help="do not handle HTTP 3xx status codes and print response as-is")
Example #8
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-c", dest="code",
         help="evaluate the code in the shell, print the result and exit")
     parser.add_option("--spider", dest="spider",
         help="use this spider")
     parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \
         default=False, help="do not handle HTTP 3xx status codes and print response as-is")
Example #9
0
 def setUp(self):
     self.command = ScrapyCommand()
     self.command.settings = Settings()
     self.parser = optparse.OptionParser(
         formatter=optparse.TitledHelpFormatter(),
         conflict_handler='resolve',
     )
     self.command.add_options(self.parser)
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "--postgres_uri",
         dest="postgres_uri",
         metavar="URI",
         help="connection string for PostgreSQL to put Strava data into",
         default="postgresql:///strava")
Example #11
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "--all",
         dest="all",
         action="store_true",
         help="Run validation on all scrapers",
     )
Example #12
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
                       help="set spider argument (may be repeated)")
     parser.add_option("-o", "--output", metavar="FILE",
                       help="dump scraped items into FILE (use - for stdout)")
     parser.add_option("-t", "--output-format", metavar="FORMAT",
                       help="format to use for dumping items with -o")
Example #13
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-c", dest="code",
         help="evaluate the code in the shell, print the result and exit")
     parser.add_option("--spider", dest="spider",
         help="use this spider")
     parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \
         default=False, help="do not handle HTTP 3xx status codes and print response as-is")
Example #14
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     if args:
         self._locations[args[0]] = opts.output_filename
     else:
         locations = self.settings.get('SPIDERDOCS_LOCATIONS', None)
         if locations:
             self._locations = locations
         else:
             raise UsageError("Module name is required.", print_help=False)
Example #15
0
    def process_options(self, args, opts):
        # 处理从命令行中传入的选项参数
        ScrapyCommand.process_options(self, args, opts)
        # print(self.settings.__dict__)
        # if not os.path.exists(os.path.dirname(self.settings.attributes.get('LOG_FILE').value)):
        #     os.makedirs(os.path.dirname(self.settings.attributes.get('LOG_FILE').value))

        # 加载默认配置
        # self.parse_default_args()
        self.parse_from_cmdline(args,opts)
Example #16
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "--verbose",
         "-v",
         dest="verbose",
         action="store_true",
         help=
         "also display twisted/python/platform info (useful for bug reports)"
     )
Example #17
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--spider", dest="spider", default=None,
                       help="use this spider without looking for one")
     parser.add_option(
         "-a",
         dest="spargs",
         action="append",
         default=[],
         metavar="NAME=VALUE",
         help="set spider argument (may be repeated)")
     parser.add_option("--pipelines", action="store_true",
                       help="process items through pipelines")
     parser.add_option(
         "--nolinks",
         dest="nolinks",
         action="store_true",
         help="don't show links to follow (extracted requests)")
     parser.add_option("--noitems", dest="noitems", action="store_true",
                       help="don't show scraped items")
     parser.add_option("--nocolour", dest="nocolour", action="store_true",
                       help="avoid using pygments to colorize the output")
     parser.add_option(
         "-r",
         "--rules",
         dest="rules",
         action="store_true",
         help="use CrawlSpider rules to discover the callback")
     parser.add_option(
         "-c",
         "--callback",
         dest="callback",
         help="use this callback for parsing, instead looking for a callback")
     parser.add_option(
         "-m",
         "--meta",
         dest="meta",
         help="inject extra meta into the Request, it must be a valid raw json string")
     parser.add_option(
         "--cbkwargs",
         dest="cbkwargs",
         help="inject extra callback kwargs into the Request, it must be a valid raw json string")
     parser.add_option(
         "-d",
         "--depth",
         dest="depth",
         type="int",
         default=1,
         help="maximum depth for parsing requests [default: %default]")
     parser.add_option(
         "-v",
         "--verbose",
         dest="verbose",
         action="store_true",
         help="print each depth level one by one")
Example #18
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE",
                          print_help=False)
     if opts.output:
         feeds = feed_process_params_from_cli(self.settings, opts.output,
                                              opts.output_format)
         self.settings.set('FEEDS', feeds, priority='cmdline')
Example #19
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
         if opts.urlqueue:
             self.settings.set('Redis_key',
                               opts.urlqueue,
                               priority='cmdline')
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE",
                          print_help=False)
Example #20
0
 def add_options(self, parser):
     '''添加命令'''
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-m", "--travelmode", dest="travelmode", default="飞机", type="str",
                       action="store", help="travel mode")
     parser.add_option("-t", "--traveltime", dest="traveltime", default=time.strftime("%Y-%m-%d", time.localtime()),
                       type="str", action="store",
                       help="travel time")
     parser.add_option("-s", "--startstation", dest="startstation", default="北京", type="str",
                       action="store", help="start city")
     parser.add_option("-e", "--endstation", dest="endstation", default="上海", type="str",
                       action="store", help="end city")
Example #21
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-l", "--list", dest="list", action="store_true",
         help="List available templates")
     parser.add_option("-e", "--edit", dest="edit", action="store_true",
         help="Edit spider after creating it")
     parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE",
         help="Dump template to standard output")
     parser.add_option("-t", "--template", dest="template", default="basic",
         help="Uses a custom template.")
     parser.add_option("--force", dest="force", action="store_true",
         help="If the spider already exists, overwrite it with the template")
Example #22
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-l", "--list", dest="list", action="store_true",
         help="List available templates")
     parser.add_option("-e", "--edit", dest="edit", action="store_true",
         help="Edit spider after creating it")
     parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE",
         help="Dump template to standard output")
     parser.add_option("-t", "--template", dest="template", default="basic",
         help="Uses a custom template.")
     parser.add_option("--force", dest="force", action="store_true",
         help="If the spider already exists, overwrite it with the template")
Example #23
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--get", dest="get", metavar="SETTING",
         help="print raw setting value")
     parser.add_option("--getbool", dest="getbool", metavar="SETTING",
         help="print setting value, interpreted as a boolean")
     parser.add_option("--getint", dest="getint", metavar="SETTING",
         help="print setting value, interpreted as an integer")
     parser.add_option("--getfloat", dest="getfloat", metavar="SETTING",
         help="print setting value, interpreted as a float")
     parser.add_option("--getlist", dest="getlist", metavar="SETTING",
         help="print setting value, interpreted as a list")
Example #24
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--spider", dest="spider", default=None, help="use this spider without looking for one")
     parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)")
     parser.add_option("--pipelines", action="store_true", help="process items through pipelines")
     parser.add_option("--nolinks", dest="nolinks", action="store_true", help="don't show links to follow (extracted requests)")
     parser.add_option("--noitems", dest="noitems", action="store_true", help="don't show scraped items")
     parser.add_option("--nocolour", dest="nocolour", action="store_true", help="avoid using pygments to colorize the output")
     parser.add_option("-r", "--rules", dest="rules", action="store_true", help="use CrawlSpider rules to discover the callback")
     parser.add_option("-c", "--callback", dest="callback", help="use this callback for parsing, instead looking for a callback")
     parser.add_option("-d", "--depth", dest="depth", type="int", default=1, help="maximum depth for parsing requests [default: %default]")
     parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="print each depth level one by one")
Example #25
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option('-p',
                       '--package-pointer',
                       help='The JSON Pointer to the value in the package')
     parser.add_option('-r',
                       '--release-pointer',
                       help='The JSON Pointer to the value in the release')
     parser.add_option(
         '-t',
         '--truncate',
         type=int,
         help='Truncate the value to this number of characters')
Example #26
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-a",
                       dest="spargs",
                       action="append",
                       default=[],
                       metavar="NAME=VALUE",
                       help="set spider argument (may be repeated)")
     parser.add_option("-i",
                       dest="incremental",
                       action="store_true",
                       default=False,
                       help="enable incremental crawl")
Example #27
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-l",
                       "--list",
                       dest="list",
                       action="store_true",
                       help="only list contracts, without checking them")
     parser.add_option("-v",
                       "--verbose",
                       dest="verbose",
                       default=False,
                       action='store_true',
                       help="print contract tests for all spiders")
Example #28
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "--mongo_uri",
         dest="mongo_uri",
         metavar="URI",
         help="connection string for MongoDB containing Strava data",
         default="mongodb://localhost:27017/strava")
     parser.add_option(
         "--postgres_uri",
         dest="postgres_uri",
         metavar="URI",
         help="connection string for PostgreSQL to put Strava data into",
         default="postgresql:///strava")
Example #29
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--spider", dest="spider", help="use this spider")
     parser.add_option("--headers",
                       dest="headers",
                       action="store_true",
                       help="print response HTTP headers instead of body")
     parser.add_option(
         "--no-redirect",
         dest="no_redirect",
         action="store_true",
         default=False,
         help="do not handle HTTP 3xx status codes and print response as-is"
     )
Example #30
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option('-p',
                       '--package-pointer',
                       help='The JSON Pointer to the value in the package')
     parser.add_option('-r',
                       '--release-pointer',
                       help='The JSON Pointer to the value in the release')
     parser.add_option(
         '-t',
         '--truncate',
         type=int,
         help='Truncate the value to this number of characters')
     parser.add_option(
         '--max-bytes',
         type=int,
         help='Stop downloading an OCDS file after reading this many bytes')
Example #31
0
    def add_options(self, parser):
        ScrapyCommand.add_options(self, parser)
        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
                          help="set spider argument (may be repeated)")
        parser.add_option("-o", "--output", metavar="FILE",
                          help="dump scraped items into FILE (use - for stdout)")
        parser.add_option("-t", "--output-format", metavar="FORMAT",
                          help="format to use for dumping items with -o")

        parser.add_option("-p", "--pages", nargs=2, type="int", dest="pages", default=[],
                          help="set the range of pages you want to crawl")
        parser.add_option("-g", "--good", action="store_true", dest="good_only", default=False,
                          help="only crawl good threads and their posts and comments")
        parser.add_option("-f", "--filter", type="str", dest="filter", default="",
                          help='set function name in "filter.py" to filter threads')
        parser.add_option("-s", "--see_lz", action="store_true", dest="see_lz", default=False,
                          help='enable "only see lz" mode')
Example #32
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option('-a',
                       dest='spargs',
                       action='append',
                       default=[],
                       metavar='NAME=VALUE',
                       help='set spider argument (may be repeated)')
     parser.add_option(
         '-o',
         '--output',
         metavar='FILE',
         help='dump scraped items into FILE(user - for stdout)')
     parser.add_option('-t',
                       '--output-format',
                       metavar='FORMAT',
                       help='format to use for dumping items with -o')
Example #33
0
class CommandSettings(unittest.TestCase):

    def setUp(self):
        self.command = ScrapyCommand()
        self.command.settings = Settings()
        self.parser = optparse.OptionParser(
            formatter=optparse.TitledHelpFormatter(),
            conflict_handler='resolve',
        )
        self.command.add_options(self.parser)

    def test_settings_json_string(self):
        feeds_json = '{"data.json": {"format": "json"}, "data.xml": {"format": "xml"}}'
        opts, args = self.parser.parse_args(args=['-s', f'FEEDS={feeds_json}', 'spider.py'])
        self.command.process_options(args, opts)
        self.assertIsInstance(self.command.settings['FEEDS'], scrapy.settings.BaseSettings)
        self.assertEqual(dict(self.command.settings['FEEDS']), json.loads(feeds_json))
Example #34
0
    def add_options(self, parser):
        ScrapyCommand.add_options(self, parser)
        parser.add_option("-a",
                          dest="spargs",
                          action="append",
                          default=[],
                          metavar="NAME=VALUE",
                          help="set spider argument (may be repeated)")
        parser.add_option(
            "-o",
            "--output",
            metavar="FILE",
            help="dump scraped items into FILE (use - for stdout)")
        parser.add_option("-t",
                          "--output-format",
                          metavar="FORMAT",
                          help="format to use for dumping items with -o")
        # 点评
        # -a就是常规的设定传入参数了  -a后 接  url='www.baidu.com'
        # -o -t 就是设定输出的格式
        # cmdline.execute("scrapy crawl lianxi -o info.csv -t csv".split())

        # 下面这些就是自己自定义的了
        # dest应该是调用时opts的属性
        # nargs 应该是参数数量?
        # action="store_true" 这是爬虫过程中才能用?还是说后面不需要跟参数?
        parser.add_option("-t",
                          "--tid",
                          nargs=1,
                          dest="tid",
                          default=None,
                          help="设定爬取帖子的tid")
        parser.add_option("-p",
                          "--pages",
                          nargs=2,
                          type="int",
                          dest="pages",
                          default=[],
                          help="设定爬取贴吧或者某个帖子的页数范围")
        parser.add_option("-d",
                          "--dirpath",
                          type="str",
                          dest="dir_path",
                          default="",
                          help='设定爬取的json文件存放路径')
Example #35
0
 def add_options(self, parser):
     # 为命令添加选项
     ScrapyCommand.add_options(self, parser)
     # add_option 第一个参数只能是单个字符
     # parser.add_option("-key", "--keyword",  type="str",dest="keyword",action='store', default= json.dumps( config.keyword),
     #                   help="设置爬取分类/关键字")
     parser.add_option("-K", "--keyword",  type="str",dest="keyword",action='store', default= json.dumps( config.keyword),
                       help="设置爬取分类/关键字")
     parser.add_option("-I", "--watermark_img_path", type="str",  dest="watermark_img_path", default=config.watermark_img_path,
                       help="")
     parser.add_option("-T", "--watermark_text", type="str",  dest="watermark_text", default=config.watermark_text,
                       help="")
     parser.add_option("-S", "--timeout", type="int",  dest="timeout", default=config.timeout,
                       help="")
     parser.add_option("-V", "--video_path", type="str", dest="video_path", default=config.VIDEO_SAVE_PATH,
                       help="")
     parser.add_option("-F", "--files_store", type="str", dest="files_store", default=config.FILES_STORE,
                       help="")
Example #36
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--Cache",
                       dest="Cache",
                       action="store_true",
                       help="Clear the Cache Directory")
     parser.add_option("--Storage",
                       dest="Storage",
                       action="store_true",
                       help="Clear the Storage Directory")
     parser.add_option("--temp",
                       dest="temp",
                       action="store_true",
                       help="Clear temporary Directory")
     parser.add_option(
         "--All",
         dest="All",
         action="store_true",
         help="Clear the Cache,Storage and temporary Directory")
Example #37
0
    def process_options(self, args, opts):
        ScrapyCommand.process_options(self, args, opts)

        opts.spargs = {}

        if len(args) == 2:
            self.settings['RSS'] = args.pop()

        self._takeover_logging()

        if opts.output:
            self.settings['OUTPUT'] = opts.output[0]
        self.settings.pop('FEEDS')

        self.settings['CMDLINE_ARGS'] = {'args': args, 'opts': vars(opts)}

        if opts.verbose:
            self.settings['VERBOSE'] = True
            self.settings.set('LOG_VIOLATIONS', True, priority='cmdline')
            self.settings.set('STATS_DUMP', True, priority='cmdline')
Example #38
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
     if opts.output:
         if opts.output == '-':
             self.settings.set('FEED_URI', 'stdout:', priority='cmdline')
         else:
             self.settings.set('FEED_URI', opts.output, priority='cmdline')
         feed_exporters = without_none_values(self.settings._getcomposite('FEED_EXPORTERS'))
         valid_output_formats = feed_exporters.keys()
         if not opts.output_format:
             opts.output_format = os.path.splitext(opts.output)[1].replace(".", "")
         if opts.output_format not in valid_output_formats:
             raise UsageError("Unrecognized output format '%s', set one"
                              " using the '-t' switch or as a file extension"
                              " from the supported list %s" % (opts.output_format,
                                                               tuple(valid_output_formats)))
         self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
Example #39
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--verbose", "-v", dest="verbose", action="store_true",
         help="also display twisted/python/platform info (useful for bug reports)")
Example #40
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-c", dest="code",
         help="evaluate the code in the shell, print the result and exit")
     parser.add_option("--spider", dest="spider",
         help="use this spider")
Example #41
0
 def add_options(self, parser):
   ScrapyCommand.add_options(self, parser)
   parser.add_option("--spider", dest="spider", help="use this spider")
   parser.add_option("--headers", dest="headers", action="store_true", \
       help="print response HTTP headers instead of body")
Example #42
0
    def process_options(self, args, opts):
        ScrapyCommand.process_options(self, args, opts)

        self.process_spider_arguments(opts)
        self.process_request_meta(opts)
Example #43
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-l", "--list", dest="list", action="store_true",
                       help="only list contracts, without checking them")
     parser.add_option("-v", "--verbose", dest="verbose", default=False, action='store_true',
                       help="print contract tests for all spiders")
Example #44
0
    def process_options(self, args, opts):
        ScrapyCommand.process_options(self, args, opts)

        opts.spargs = arglist_to_dict(opts.spargs)
Example #45
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         print("Invalid -a value, use -a NAME=VALUE")
Example #46
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
Example #47
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--spider", dest="spider",
         help="use this spider")