def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError( "Invalid -a value, use -a NAME=VALUE", print_help=False)
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--get", dest="get", metavar="SETTING", help="print raw setting value") parser.add_option("--getbool", dest="getbool", metavar="SETTING", help="print setting value, interpreted as a boolean") parser.add_option("--getint", dest="getint", metavar="SETTING", help="print setting value, interpreted as an integer") parser.add_option("--getfloat", dest="getfloat", metavar="SETTING", help="print setting value, interpreted as a float") parser.add_option("--getlist", dest="getlist", metavar="SETTING", help="print setting value, interpreted as a list")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option('--dry-run', action='store_true', help='Runs the spiders without writing any files') parser.add_option('--sample', type=int, help='The number of files to write')
def add_options(self, parser): parser.usage = "usage: scrapy spiderdocs [<module.name>] [-o <filename.md>]" ScrapyCommand.add_options(self, parser) parser.add_option("-o", "--output", dest="output_filename", metavar="FILE", help="Output file name.")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) arg = parser.add_option arg('-o', '--output', help='prefix for charts (without ".html")') arg('--step', type=float, default=30, help='time step, s') arg('--smooth', type=int, default=50, help='smooth span') arg('--top', type=int, default=30, help='top domains to show') arg('--no-show', action='store_true', help='don\'t show charts')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("-o", "--output", metavar="FILE", help="dump scraped items into FILE (use - for stdout)") parser.add_option("-t", "--output-format", metavar="FORMAT", help="format to use for dumping items with -o")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", help="use this spider") parser.add_option("--headers", dest="headers", action="store_true", \ help="print response HTTP headers instead of body") parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \ default=False, help="do not handle HTTP 3xx status codes and print response as-is")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-c", dest="code", help="evaluate the code in the shell, print the result and exit") parser.add_option("--spider", dest="spider", help="use this spider") parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \ default=False, help="do not handle HTTP 3xx status codes and print response as-is")
def setUp(self): self.command = ScrapyCommand() self.command.settings = Settings() self.parser = optparse.OptionParser( formatter=optparse.TitledHelpFormatter(), conflict_handler='resolve', ) self.command.add_options(self.parser)
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--postgres_uri", dest="postgres_uri", metavar="URI", help="connection string for PostgreSQL to put Strava data into", default="postgresql:///strava")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--all", dest="all", action="store_true", help="Run validation on all scrapers", )
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) if args: self._locations[args[0]] = opts.output_filename else: locations = self.settings.get('SPIDERDOCS_LOCATIONS', None) if locations: self._locations = locations else: raise UsageError("Module name is required.", print_help=False)
def process_options(self, args, opts): # 处理从命令行中传入的选项参数 ScrapyCommand.process_options(self, args, opts) # print(self.settings.__dict__) # if not os.path.exists(os.path.dirname(self.settings.attributes.get('LOG_FILE').value)): # os.makedirs(os.path.dirname(self.settings.attributes.get('LOG_FILE').value)) # 加载默认配置 # self.parse_default_args() self.parse_from_cmdline(args,opts)
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--verbose", "-v", dest="verbose", action="store_true", help= "also display twisted/python/platform info (useful for bug reports)" )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", default=None, help="use this spider without looking for one") parser.add_option( "-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("--pipelines", action="store_true", help="process items through pipelines") parser.add_option( "--nolinks", dest="nolinks", action="store_true", help="don't show links to follow (extracted requests)") parser.add_option("--noitems", dest="noitems", action="store_true", help="don't show scraped items") parser.add_option("--nocolour", dest="nocolour", action="store_true", help="avoid using pygments to colorize the output") parser.add_option( "-r", "--rules", dest="rules", action="store_true", help="use CrawlSpider rules to discover the callback") parser.add_option( "-c", "--callback", dest="callback", help="use this callback for parsing, instead looking for a callback") parser.add_option( "-m", "--meta", dest="meta", help="inject extra meta into the Request, it must be a valid raw json string") parser.add_option( "--cbkwargs", dest="cbkwargs", help="inject extra callback kwargs into the Request, it must be a valid raw json string") parser.add_option( "-d", "--depth", dest="depth", type="int", default=1, help="maximum depth for parsing requests [default: %default]") parser.add_option( "-v", "--verbose", dest="verbose", action="store_true", help="print each depth level one by one")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: feeds = feed_process_params_from_cli(self.settings, opts.output, opts.output_format) self.settings.set('FEEDS', feeds, priority='cmdline')
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) if opts.urlqueue: self.settings.set('Redis_key', opts.urlqueue, priority='cmdline') except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
def add_options(self, parser): '''添加命令''' ScrapyCommand.add_options(self, parser) parser.add_option("-m", "--travelmode", dest="travelmode", default="飞机", type="str", action="store", help="travel mode") parser.add_option("-t", "--traveltime", dest="traveltime", default=time.strftime("%Y-%m-%d", time.localtime()), type="str", action="store", help="travel time") parser.add_option("-s", "--startstation", dest="startstation", default="北京", type="str", action="store", help="start city") parser.add_option("-e", "--endstation", dest="endstation", default="上海", type="str", action="store", help="end city")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="List available templates") parser.add_option("-e", "--edit", dest="edit", action="store_true", help="Edit spider after creating it") parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE", help="Dump template to standard output") parser.add_option("-t", "--template", dest="template", default="basic", help="Uses a custom template.") parser.add_option("--force", dest="force", action="store_true", help="If the spider already exists, overwrite it with the template")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", default=None, help="use this spider without looking for one") parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("--pipelines", action="store_true", help="process items through pipelines") parser.add_option("--nolinks", dest="nolinks", action="store_true", help="don't show links to follow (extracted requests)") parser.add_option("--noitems", dest="noitems", action="store_true", help="don't show scraped items") parser.add_option("--nocolour", dest="nocolour", action="store_true", help="avoid using pygments to colorize the output") parser.add_option("-r", "--rules", dest="rules", action="store_true", help="use CrawlSpider rules to discover the callback") parser.add_option("-c", "--callback", dest="callback", help="use this callback for parsing, instead looking for a callback") parser.add_option("-d", "--depth", dest="depth", type="int", default=1, help="maximum depth for parsing requests [default: %default]") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="print each depth level one by one")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option('-p', '--package-pointer', help='The JSON Pointer to the value in the package') parser.add_option('-r', '--release-pointer', help='The JSON Pointer to the value in the release') parser.add_option( '-t', '--truncate', type=int, help='Truncate the value to this number of characters')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("-i", dest="incremental", action="store_true", default=False, help="enable incremental crawl")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="only list contracts, without checking them") parser.add_option("-v", "--verbose", dest="verbose", default=False, action='store_true', help="print contract tests for all spiders")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--mongo_uri", dest="mongo_uri", metavar="URI", help="connection string for MongoDB containing Strava data", default="mongodb://localhost:27017/strava") parser.add_option( "--postgres_uri", dest="postgres_uri", metavar="URI", help="connection string for PostgreSQL to put Strava data into", default="postgresql:///strava")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", help="use this spider") parser.add_option("--headers", dest="headers", action="store_true", help="print response HTTP headers instead of body") parser.add_option( "--no-redirect", dest="no_redirect", action="store_true", default=False, help="do not handle HTTP 3xx status codes and print response as-is" )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option('-p', '--package-pointer', help='The JSON Pointer to the value in the package') parser.add_option('-r', '--release-pointer', help='The JSON Pointer to the value in the release') parser.add_option( '-t', '--truncate', type=int, help='Truncate the value to this number of characters') parser.add_option( '--max-bytes', type=int, help='Stop downloading an OCDS file after reading this many bytes')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("-o", "--output", metavar="FILE", help="dump scraped items into FILE (use - for stdout)") parser.add_option("-t", "--output-format", metavar="FORMAT", help="format to use for dumping items with -o") parser.add_option("-p", "--pages", nargs=2, type="int", dest="pages", default=[], help="set the range of pages you want to crawl") parser.add_option("-g", "--good", action="store_true", dest="good_only", default=False, help="only crawl good threads and their posts and comments") parser.add_option("-f", "--filter", type="str", dest="filter", default="", help='set function name in "filter.py" to filter threads') parser.add_option("-s", "--see_lz", action="store_true", dest="see_lz", default=False, help='enable "only see lz" mode')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option('-a', dest='spargs', action='append', default=[], metavar='NAME=VALUE', help='set spider argument (may be repeated)') parser.add_option( '-o', '--output', metavar='FILE', help='dump scraped items into FILE(user - for stdout)') parser.add_option('-t', '--output-format', metavar='FORMAT', help='format to use for dumping items with -o')
class CommandSettings(unittest.TestCase): def setUp(self): self.command = ScrapyCommand() self.command.settings = Settings() self.parser = optparse.OptionParser( formatter=optparse.TitledHelpFormatter(), conflict_handler='resolve', ) self.command.add_options(self.parser) def test_settings_json_string(self): feeds_json = '{"data.json": {"format": "json"}, "data.xml": {"format": "xml"}}' opts, args = self.parser.parse_args(args=['-s', f'FEEDS={feeds_json}', 'spider.py']) self.command.process_options(args, opts) self.assertIsInstance(self.command.settings['FEEDS'], scrapy.settings.BaseSettings) self.assertEqual(dict(self.command.settings['FEEDS']), json.loads(feeds_json))
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option( "-o", "--output", metavar="FILE", help="dump scraped items into FILE (use - for stdout)") parser.add_option("-t", "--output-format", metavar="FORMAT", help="format to use for dumping items with -o") # 点评 # -a就是常规的设定传入参数了 -a后 接 url='www.baidu.com' # -o -t 就是设定输出的格式 # cmdline.execute("scrapy crawl lianxi -o info.csv -t csv".split()) # 下面这些就是自己自定义的了 # dest应该是调用时opts的属性 # nargs 应该是参数数量? # action="store_true" 这是爬虫过程中才能用?还是说后面不需要跟参数? parser.add_option("-t", "--tid", nargs=1, dest="tid", default=None, help="设定爬取帖子的tid") parser.add_option("-p", "--pages", nargs=2, type="int", dest="pages", default=[], help="设定爬取贴吧或者某个帖子的页数范围") parser.add_option("-d", "--dirpath", type="str", dest="dir_path", default="", help='设定爬取的json文件存放路径')
def add_options(self, parser): # 为命令添加选项 ScrapyCommand.add_options(self, parser) # add_option 第一个参数只能是单个字符 # parser.add_option("-key", "--keyword", type="str",dest="keyword",action='store', default= json.dumps( config.keyword), # help="设置爬取分类/关键字") parser.add_option("-K", "--keyword", type="str",dest="keyword",action='store', default= json.dumps( config.keyword), help="设置爬取分类/关键字") parser.add_option("-I", "--watermark_img_path", type="str", dest="watermark_img_path", default=config.watermark_img_path, help="") parser.add_option("-T", "--watermark_text", type="str", dest="watermark_text", default=config.watermark_text, help="") parser.add_option("-S", "--timeout", type="int", dest="timeout", default=config.timeout, help="") parser.add_option("-V", "--video_path", type="str", dest="video_path", default=config.VIDEO_SAVE_PATH, help="") parser.add_option("-F", "--files_store", type="str", dest="files_store", default=config.FILES_STORE, help="")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--Cache", dest="Cache", action="store_true", help="Clear the Cache Directory") parser.add_option("--Storage", dest="Storage", action="store_true", help="Clear the Storage Directory") parser.add_option("--temp", dest="temp", action="store_true", help="Clear temporary Directory") parser.add_option( "--All", dest="All", action="store_true", help="Clear the Cache,Storage and temporary Directory")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) opts.spargs = {} if len(args) == 2: self.settings['RSS'] = args.pop() self._takeover_logging() if opts.output: self.settings['OUTPUT'] = opts.output[0] self.settings.pop('FEEDS') self.settings['CMDLINE_ARGS'] = {'args': args, 'opts': vars(opts)} if opts.verbose: self.settings['VERBOSE'] = True self.settings.set('LOG_VIOLATIONS', True, priority='cmdline') self.settings.set('STATS_DUMP', True, priority='cmdline')
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.set('FEED_URI', 'stdout:', priority='cmdline') else: self.settings.set('FEED_URI', opts.output, priority='cmdline') feed_exporters = without_none_values(self.settings._getcomposite('FEED_EXPORTERS')) valid_output_formats = feed_exporters.keys() if not opts.output_format: opts.output_format = os.path.splitext(opts.output)[1].replace(".", "") if opts.output_format not in valid_output_formats: raise UsageError("Unrecognized output format '%s', set one" " using the '-t' switch or as a file extension" " from the supported list %s" % (opts.output_format, tuple(valid_output_formats))) self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--verbose", "-v", dest="verbose", action="store_true", help="also display twisted/python/platform info (useful for bug reports)")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-c", dest="code", help="evaluate the code in the shell, print the result and exit") parser.add_option("--spider", dest="spider", help="use this spider")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", help="use this spider") parser.add_option("--headers", dest="headers", action="store_true", \ help="print response HTTP headers instead of body")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) self.process_spider_arguments(opts) self.process_request_meta(opts)
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) opts.spargs = arglist_to_dict(opts.spargs)
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: print("Invalid -a value, use -a NAME=VALUE")
def add_options(self, parser): ScrapyCommand.add_options(self, parser)
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", help="use this spider")