def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "--postgres_uri",
         dest="postgres_uri",
         metavar="URI",
         help="connection string for PostgreSQL to put data into",
         default="postgresql:///strava")
     parser.add_option("-a",
                       dest="spargs",
                       action="append",
                       default={},
                       metavar="NAME=VALUE",
                       help="set spider argument (may be repeated)")
     parser.add_option(
         "-m",
         "--max",
         action="store_true",
         dest="max",
         help="start crawling from the largest ID found in the DB")
     parser.add_option(
         "-s",
         "--start",
         dest="start_val",
         default=None,
         metavar="ID",
         type="int",
         help="start crawling from the ID equal to this value")
Example #2
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "-c",
         dest="code",
         help="evaluate the code in the shell, print the result and exit")
     parser.add_option("--spider", dest="spider", help="use this spider")
Example #3
0
    def add_options(self, parser):
        ScrapyCommand.add_options(self, parser)
        parser.add_option("-a",
                          dest="spargs",
                          action="append",
                          default=[],
                          metavar="NAME=VALUE",
                          help="set spider argument (may be repeated)")
        parser.add_option(
            "-o",
            "--output",
            metavar="FILE",
            help="dump scraped items into FILE (use - for stdout)")
        parser.add_option("-t",
                          "--output-format",
                          metavar="FORMAT",
                          help="format to use for dumping items with -o")

        parser.add_option("-c",
                          "--city",
                          type="str",
                          dest="city",
                          default="",
                          help="设置要爬取的城市名,如 北京市")
        parser.add_option("-d",
                          "--dates",
                          nargs=2,
                          type="str",
                          dest="sign_date",
                          default=["", ""],
                          help="设置要爬取的签约起始日期和结束如期")
Example #4
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-l",
                       "--list",
                       dest="list",
                       action="store_true",
                       help="List available templates")
     parser.add_option("-e",
                       "--edit",
                       dest="edit",
                       action="store_true",
                       help="Edit spider after creating it")
     parser.add_option("-d",
                       "--dump",
                       dest="dump",
                       metavar="TEMPLATE",
                       help="Dump template to standard output")
     parser.add_option("-t",
                       "--template",
                       dest="template",
                       default="basic",
                       help="Uses a custom template.")
     parser.add_option(
         "--force",
         dest="force",
         action="store_true",
         help="If the spider already exists, overwrite it with the template"
     )
Example #5
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--spider", dest="spider", help="use this spider")
     parser.add_option("--headers", dest="headers", action="store_true", \
         help="print response HTTP headers instead of body")
     parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \
         default=False, help="do not handle HTTP 3xx status codes and print response as-is")
Example #6
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "-q",
         "--queue",
         type="str",
         dest="queue_name",
         help="Queue name to consume messages",
         action="callback",
         callback=self.queue_option_callback,
     )
     parser.add_option(
         "-m",
         "--mode",
         type="choice",
         choices=self.action_modes,
         default="action",
         dest="mode",
         help=
         "Command run mode: action for one time execution and exit or worker",
     )
     parser.add_option(
         "-p",
         "--prefetch_count",
         type="int",
         default=None,
         dest="prefetch_count",
         help="RabbitMQ consumer prefetch count setting",
     )
Example #7
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     # 运行验证程序
     parser.add_option("-r",
                       dest="validate",
                       default=False,
                       action="store_true",
                       help="run validate")
     # 运行循环验证程序
     parser.add_option("-l",
                       dest="circle",
                       default=False,
                       action="store_true",
                       help="run circle validate")
     # 运行抓取程序
     parser.add_option("-c",
                       dest="crawl",
                       default=False,
                       action="store_true",
                       help="run spider crawl")
     # 运行RPC程序
     parser.add_option("--rpc",
                       dest="rpc",
                       default=False,
                       action="store_true",
                       help="run rpc server")
     # 测试RPC程序
     parser.add_option("--rpc-test",
                       dest="rpc_test",
                       default=False,
                       action="store_true",
                       help="test rpc server")
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "--city",
         dest="city",
         help=
         "City code. Must be an integer, according to internal Booking.com id for location"
     )
     parser.add_option("--checkin",
                       dest="checkin",
                       help="Checkin date in ISO (YYYY-MM-DD) format")
     parser.add_option("--checkout",
                       dest="checkout",
                       help="Checkout date in ISO (YYYY-MM-DD) format")
     parser.add_option("-p",
                       "--proxy",
                       dest="proxy",
                       default=False,
                       action='store_true',
                       help="Use proxy servers")
     parser.add_option("--cr",
                       dest="concurrent_requests",
                       default=16,
                       help="Use maximum concurrent requests (default: 16)")
     parser.add_option(
         "--crpd",
         dest="concurrent_requests_per_domain",
         default=16,
         help="Use concurrent requests per domain (default: 16)")
     parser.add_option("--crpip",
                       dest="concurrent_requests_per_ip",
                       default=16,
                       help="Use concurrent requests per ip (default: 16)")
Example #9
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--spider", dest="spider", default=None,
         help="use this spider without looking for one")
     parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
         help="set spider argument (may be repeated)")
     parser.add_option("--pipelines", action="store_true",
         help="process items through pipelines")
     parser.add_option("--nolinks", dest="nolinks", action="store_true",
         help="don't show links to follow (extracted requests)")
     parser.add_option("--noitems", dest="noitems", action="store_true",
         help="don't show scraped items")
     parser.add_option("--nocolour", dest="nocolour", action="store_true",
         help="avoid using pygments to colorize the output")
     parser.add_option("-r", "--rules", dest="rules", action="store_true",
         help="use CrawlSpider rules to discover the callback")
     parser.add_option("-c", "--callback", dest="callback",
         help="use this callback for parsing, instead looking for a callback")
     parser.add_option("-m", "--meta", dest="meta",
         help="inject extra meta into the Request, it must be a valid raw json string")
     parser.add_option("--cbkwargs", dest="cbkwargs",
         help="inject extra callback kwargs into the Request, it must be a valid raw json string")
     parser.add_option("-d", "--depth", dest="depth", type="int", default=1,
         help="maximum depth for parsing requests [default: %default]")
     parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
         help="print each depth level one by one")
Example #10
0
class CommandSettings(unittest.TestCase):
    def setUp(self):
        self.command = ScrapyCommand()
        self.command.settings = Settings()
        self.parser = argparse.ArgumentParser(
            formatter_class=ScrapyHelpFormatter, conflict_handler='resolve')
        self.command.add_options(self.parser)

    def test_settings_json_string(self):
        feeds_json = '{"data.json": {"format": "json"}, "data.xml": {"format": "xml"}}'
        opts, args = self.parser.parse_known_args(
            args=['-s', f'FEEDS={feeds_json}', 'spider.py'])
        self.command.process_options(args, opts)
        self.assertIsInstance(self.command.settings['FEEDS'],
                              scrapy.settings.BaseSettings)
        self.assertEqual(dict(self.command.settings['FEEDS']),
                         json.loads(feeds_json))

    def test_help_formatter(self):
        formatter = ScrapyHelpFormatter(prog='scrapy')
        part_strings = [
            'usage: scrapy genspider [options] <name> <domain>\n\n', '\n',
            'optional arguments:\n', '\n', 'Global Options:\n'
        ]
        self.assertEqual(
            formatter._join_parts(part_strings),
            ('Usage\n=====\n  scrapy genspider [options] <name> <domain>\n\n\n'
             'Optional Arguments\n==================\n\n'
             'Global Options\n--------------\n'))
Example #11
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-a",
                       dest="spargs",
                       action="append",
                       default=[],
                       metavar="NAME=VALUE",
                       help="set spider argument (may be repeated)")
     parser.add_option(
         "-o",
         "--output",
         metavar="FILE",
         help="dump scraped items into FILE (use - for stdout)")
     parser.add_option("-t",
                       "--output-format",
                       metavar="FORMAT",
                       help="format to use for dumping items with -o")
     parser.add_option("-k",
                       "--keyword",
                       metavar="KEYWORD",
                       type="str",
                       dest="keyword",
                       default="",
                       help="set keyword")
     parser.add_option("-l",
                       "--link",
                       metavar="LINK",
                       type="str",
                       dest="link",
                       default="",
                       help="set link")
    def add_options(self, parser):
        ScrapyCommand.add_options(self, parser)
        parser.add_option("-a",
                          dest="spargs",
                          action="append",
                          default=[],
                          metavar="NAME=VALUE",
                          help="set spider argument (may be repeated)")
        parser.add_option(
            "-o",
            "--output",
            metavar="FILE",
            help="dump scraped items into FILE (use - for stdout)")
        parser.add_option("-t",
                          "--output-format",
                          metavar="FORMAT",
                          help="format to use for dumping items with -o")

        parser.add_option("-p",
                          "--pages",
                          nargs=2,
                          type="int",
                          dest="pages",
                          default=[],
                          help="set the range of pages you want to crawl")
        parser.add_option(
            "-f",
            "--filter",
            type="str",
            dest="filter",
            default="",
            help='set function name in "filter.py" to filter threads')
Example #13
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-a",
                       dest="spargs",
                       action="append",
                       default=[],
                       metavar="NAME=VALUE",
                       help="set spider argument (may be repeated)")
     parser.add_option("-p",
                       "--pages",
                       nargs=2,
                       type="int",
                       dest="pages",
                       default=[],
                       help="set the range of pages you want to crawl")
     parser.add_option(
         "-g",
         "--good",
         action="store_true",
         dest="good_only",
         default=False,
         help="only crawl good threads and their posts and comments")
     parser.add_option(
         "-f",
         "--filter",
         type="str",
         dest="filter",
         default="",
         help='set function name in "filter.py" to filter threads')
     parser.add_option("-s",
                       "--see_lz",
                       action="store_true",
                       dest="see_lz",
                       default=False,
                       help='enable "only see lz" mode')
Example #14
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--get",
                       dest="get",
                       metavar="SETTING",
                       help="print raw setting value")
     parser.add_option(
         "--getbool",
         dest="getbool",
         metavar="SETTING",
         help="print setting value, interpreted as a boolean",
     )
     parser.add_option(
         "--getint",
         dest="getint",
         metavar="SETTING",
         help="print setting value, interpreted as an integer",
     )
     parser.add_option(
         "--getfloat",
         dest="getfloat",
         metavar="SETTING",
         help="print setting value, interpreted as a float",
     )
     parser.add_option(
         "--getlist",
         dest="getlist",
         metavar="SETTING",
         help="print setting value, interpreted as a list",
     )
Example #15
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--get", dest="get", metavar="SETTING", help="print raw setting value")
     parser.add_option("--getbool", dest="getbool", metavar="SETTING", help="print setting value, interpreted as a boolean")
     parser.add_option("--getint", dest="getint", metavar="SETTING", help="print setting value, interpreted as an integer")
     parser.add_option("--getfloat", dest="getfloat", metavar="SETTING", help="print setting value, interpreted as a float")
     parser.add_option("--getlist", dest="getlist", metavar="SETTING", help="print setting value, interpreted as a list")
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "--postgres_uri",
         dest="postgres_uri",
         metavar="URI",
         help="connection string for PostgreSQL to put Strava data into",
         default="postgresql:///strava")
Example #17
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-c", dest="code",
         help="evaluate the code in the shell, print the result and exit")
     parser.add_option("--spider", dest="spider",
         help="use this spider")
     parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \
         default=False, help="do not handle HTTP 3xx status codes and print response as-is")
Example #18
0
 def add_options(self, parser):
     parser.usage = "usage: scrapy spiderdocs [<module.name>] [-o <filename.md>]"
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-o",
                       "--output",
                       dest="output_filename",
                       metavar="FILE",
                       help="Output file name.")
Example #19
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--spider", dest="spider",
         help="use this spider")
     parser.add_option("--headers", dest="headers", action="store_true", \
         help="print response HTTP headers instead of body")
     parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \
         default=False, help="do not handle HTTP 3xx status codes and print response as-is")
Example #20
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
                       help="set spider argument (may be repeated)")
     parser.add_option("-o", "--output", metavar="FILE",
                       help="dump scraped items into FILE (use - for stdout)")
     parser.add_option("-t", "--output-format", metavar="FORMAT",
                       help="format to use for dumping items with -o")
Example #21
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "--all",
         dest="all",
         action="store_true",
         help="Run validation on all scrapers",
     )
Example #22
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-c", dest="code",
         help="evaluate the code in the shell, print the result and exit")
     parser.add_option("--spider", dest="spider",
         help="use this spider")
     parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \
         default=False, help="do not handle HTTP 3xx status codes and print response as-is")
Example #23
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     arg = parser.add_option
     arg('-o', '--output', help='prefix for charts (without ".html")')
     arg('--step', type=float, default=30, help='time step, s')
     arg('--smooth', type=int, default=50, help='smooth span')
     arg('--top', type=int, default=30, help='top domains to show')
     arg('--no-show', action='store_true', help='don\'t show charts')
Example #24
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option('--dry-run',
                       action='store_true',
                       help='Runs the spiders without writing any files')
     parser.add_option('--sample',
                       type=int,
                       help='The number of files to write')
Example #25
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "--verbose",
         "-v",
         dest="verbose",
         action="store_true",
         help=
         "also display twisted/python/platform info (useful for bug reports)"
     )
Example #26
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
                       help="set spider argument (may be repeated)")
     parser.add_option("-o", "--output", metavar="FILE",
                       help="dump scraped items into FILE (use - for stdout)")
     parser.add_option("-t", "--output-format", metavar="FORMAT",
                       help="format to use for dumping items with -o")
                       
     parser.add_option("-p", "--page", type="int", dest="page", default=9999999,
                       help="set the num of pages you want to crawl")                    
Example #27
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--spider", dest="spider", default=None, help="use this spider without looking for one")
     parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)")
     parser.add_option("--pipelines", action="store_true", help="process items through pipelines")
     parser.add_option("--nolinks", dest="nolinks", action="store_true", help="don't show links to follow (extracted requests)")
     parser.add_option("--noitems", dest="noitems", action="store_true", help="don't show scraped items")
     parser.add_option("--nocolour", dest="nocolour", action="store_true", help="avoid using pygments to colorize the output")
     parser.add_option("-r", "--rules", dest="rules", action="store_true", help="use CrawlSpider rules to discover the callback")
     parser.add_option("-c", "--callback", dest="callback", help="use this callback for parsing, instead looking for a callback")
     parser.add_option("-d", "--depth", dest="depth", type="int", default=1, help="maximum depth for parsing requests [default: %default]")
     parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="print each depth level one by one")
Example #28
0
 def add_options(self, parser):
     '''添加命令'''
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-m", "--travelmode", dest="travelmode", default="飞机", type="str",
                       action="store", help="travel mode")
     parser.add_option("-t", "--traveltime", dest="traveltime", default=time.strftime("%Y-%m-%d", time.localtime()),
                       type="str", action="store",
                       help="travel time")
     parser.add_option("-s", "--startstation", dest="startstation", default="北京", type="str",
                       action="store", help="start city")
     parser.add_option("-e", "--endstation", dest="endstation", default="上海", type="str",
                       action="store", help="end city")
Example #29
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-l", "--list", dest="list", action="store_true",
         help="List available templates")
     parser.add_option("-e", "--edit", dest="edit", action="store_true",
         help="Edit spider after creating it")
     parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE",
         help="Dump template to standard output")
     parser.add_option("-t", "--template", dest="template", default="basic",
         help="Uses a custom template.")
     parser.add_option("--force", dest="force", action="store_true",
         help="If the spider already exists, overwrite it with the template")
Example #30
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option('-p',
                       '--package-pointer',
                       help='The JSON Pointer to the value in the package')
     parser.add_option('-r',
                       '--release-pointer',
                       help='The JSON Pointer to the value in the release')
     parser.add_option(
         '-t',
         '--truncate',
         type=int,
         help='Truncate the value to this number of characters')
Example #31
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-a",
                       dest="spargs",
                       action="append",
                       default=[],
                       metavar="NAME=VALUE",
                       help="set spider argument (may be repeated)")
     parser.add_option("-i",
                       dest="incremental",
                       action="store_true",
                       default=False,
                       help="enable incremental crawl")
Example #32
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-l",
                       "--list",
                       dest="list",
                       action="store_true",
                       help="only list contracts, without checking them")
     parser.add_option("-v",
                       "--verbose",
                       dest="verbose",
                       default=False,
                       action='store_true',
                       help="print contract tests for all spiders")
Example #33
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option(
         "--mongo_uri",
         dest="mongo_uri",
         metavar="URI",
         help="connection string for MongoDB containing Strava data",
         default="mongodb://localhost:27017/strava")
     parser.add_option(
         "--postgres_uri",
         dest="postgres_uri",
         metavar="URI",
         help="connection string for PostgreSQL to put Strava data into",
         default="postgresql:///strava")
Example #34
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option('-a',
                       dest='spargs',
                       action='append',
                       default=[],
                       metavar='NAME=VALUE',
                       help='set spider argument (may be repeated)')
     parser.add_option(
         '-o',
         '--output',
         metavar='FILE',
         help='dump scraped items into FILE(user - for stdout)')
     parser.add_option('-t',
                       '--output-format',
                       metavar='FORMAT',
                       help='format to use for dumping items with -o')
Example #35
0
class CommandSettings(unittest.TestCase):

    def setUp(self):
        self.command = ScrapyCommand()
        self.command.settings = Settings()
        self.parser = optparse.OptionParser(
            formatter=optparse.TitledHelpFormatter(),
            conflict_handler='resolve',
        )
        self.command.add_options(self.parser)

    def test_settings_json_string(self):
        feeds_json = '{"data.json": {"format": "json"}, "data.xml": {"format": "xml"}}'
        opts, args = self.parser.parse_args(args=['-s', f'FEEDS={feeds_json}', 'spider.py'])
        self.command.process_options(args, opts)
        self.assertIsInstance(self.command.settings['FEEDS'], scrapy.settings.BaseSettings)
        self.assertEqual(dict(self.command.settings['FEEDS']), json.loads(feeds_json))
Example #36
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option('-p',
                       '--package-pointer',
                       help='The JSON Pointer to the value in the package')
     parser.add_option('-r',
                       '--release-pointer',
                       help='The JSON Pointer to the value in the release')
     parser.add_option(
         '-t',
         '--truncate',
         type=int,
         help='Truncate the value to this number of characters')
     parser.add_option(
         '--max-bytes',
         type=int,
         help='Stop downloading an OCDS file after reading this many bytes')
Example #37
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-c", dest="code",
         help="evaluate the code in the shell, print the result and exit")
     parser.add_option("--spider", dest="spider",
         help="use this spider")
Example #38
0
 def add_options(self, parser):
   ScrapyCommand.add_options(self, parser)
   parser.add_option("--spider", dest="spider", help="use this spider")
   parser.add_option("--headers", dest="headers", action="store_true", \
       help="print response HTTP headers instead of body")
Example #39
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("-l", "--list", dest="list", action="store_true",
                       help="only list contracts, without checking them")
     parser.add_option("-v", "--verbose", dest="verbose", default=False, action='store_true',
                       help="print contract tests for all spiders")
Example #40
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
Example #41
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--spider", dest="spider",
         help="use this spider")
Example #42
0
 def add_options(self, parser):
     ScrapyCommand.add_options(self, parser)
     parser.add_option("--verbose", "-v", dest="verbose", action="store_true",
         help="also display twisted/python/platform info (useful for bug reports)")